- 勉強会のトピックをグラフにして視覚化する話を書いた(こちら)
- 似たような処理にAssociation rule法がある
- こちらでは、「親用語」に対して複数の「子用語」を与えるという形で入力を作り、それに基づいてグラフ化したが、Association rule法の方では「親子」の区別をなくして作業
- 過去2か月のこのブログの取り扱い内容をテキストファイルにして二種類の方法で示してみよう
my.knowledge.rule <- function(file,support=0.1,confidence=0.05,from=NULL,to=NULL,target.date=NULL,target.node=NULL,max.edge=NULL,alpha=1){
library(arulesViz)
tmp <- as.matrix(read.table(file,sep="\t",na.string="")
)
dates <- as.numeric(tmp[,1])
if(is.null(from))from<-min(dates)
if(is.null(to))to<-max(dates)
targets <- which(dates >= from & dates <= to)
d <- tmp[targets,-1]
n <- length(d[,1])
d.list <- list()
for(i in 1:n){
tmp <- d[i,]
d.list[[i]] <- tmp[which(!is.na(tmp))]
}
names(d.list) <- paste("Tr",c(1:n), sep = "")
trans <- as(d.list, "transactions")
rules <- apriori(trans, parameter=list(support=support, confidence=confidence))
return(list(trans=trans,rules=rules))
}
kn <- my.knowledge.rule("ryamada22.txt",support=0.01,confidence=0.05)
subrules2 <- sample(kn$rules,100)
plot(subrules2, method = "graph", control = list(type = "items"))
my.knowledge.graph <- function(file,from=NULL,to=NULL,target.date=NULL,target.node=NULL,max.edge=NULL,alpha=1,layout=layout.kamada.kawai){ library(igraph)
tmp <- as.matrix(read.table(file,sep="\t",na.string="",fill=TRUE)
)
dates <- as.numeric(tmp[,1])
dt <- tmp[,-1]
n <- length(dt[,1])
d.list <- list()
for(i in 1:n){
tmp2 <- dt[i,]
d.list[[i]] <- c(tmp2[which(!is.na(tmp2))])
}
if(is.null(from))from<-min(dates)
if(is.null(to))to<-max(dates)
targets <- which(dates >= from & dates <= to)
targets <- 1:length(d.list)
e <- matrix(0,0,2)
for(i in targets){
if(length(d.list[[i]]) > 1){
for(j in 2:length(d.list[[i]])){
e <- rbind(e,c(d.list[[i]][1],d.list[[i]][j]))
}
}
}
e.uniq <- unique(e)
e.weight <- rep(0,length(e.uniq[,1]))
for(i in 1:length(e.uniq[,1])){
e.weight[i] <- length(which(e[,1]==e.uniq[i,1] & e[,2]==e.uniq[i,2]))
}
selected.e <- 1:length(e.weight)
if(is.null(max.edge)){
selected.e <- which(e.weight >= quantile(e.weight,1-alpha))
}else{
sort.w <- sort(e.weight,decreasing=TRUE)
tmp.max.edge <- min(max.edge,length(e.weight))
tmp.w <- sort.w[tmp.max.edge]
selected.e <- which(e.weight >= tmp.w)
}
library(igraph)
g <- graph.edgelist(e.uniq[selected.e,],directed=FALSE)
E(g)$weight <- e.weight
v <- V(g)$name
col <- rep(rgb(0.3,0.3,0.3,alpha=0.3) ,length(v))
if(!is.null(target.date)){
tmp.v <- which(dates[targets]==target.date)
col[which(v %in% d[tmp.v,1])] <- rgb(1,0.3,0.3,alpha=0.5)
}
if(!is.null(target.node)){
col[which(v %in% target.node)] <- rgb(1,0.3,0.3,alpha=0.5)
}
e.w <- E(g)$weight
plot(g,layout=layout,vertex.color=col,edge.width = e.w)
}
my.knowledge.graph("ryamada22.txt",layout=layout.kamada.kawai)