グラフ的ノート

  • 勉強会のトピックをグラフにして視覚化する話を書いた(こちら)
  • 似たような処理にAssociation rule法がある
  • こちらでは、「親用語」に対して複数の「子用語」を与えるという形で入力を作り、それに基づいてグラフ化したが、Association rule法の方では「親子」の区別をなくして作業
  • 過去2か月のこのブログの取り扱い内容をテキストファイルにして二種類の方法で示してみよう


  • Association rule法
my.knowledge.rule <- function(file,support=0.1,confidence=0.05,from=NULL,to=NULL,target.date=NULL,target.node=NULL,max.edge=NULL,alpha=1){
	library(arulesViz)
	tmp <- as.matrix(read.table(file,sep="\t",na.string="")
)
	dates <- as.numeric(tmp[,1])
	if(is.null(from))from<-min(dates)
	if(is.null(to))to<-max(dates)
	targets <- which(dates >= from & dates <= to)
	d <- tmp[targets,-1]
	# transactionオブジェクトを作る
	n <- length(d[,1])
	d.list <- list()
	for(i in 1:n){
		tmp <- d[i,]
		d.list[[i]] <- tmp[which(!is.na(tmp))]
	}
	#d.list <- apply(d,1,function(v){list(v[which(!is.na(v))])})

	names(d.list) <- paste("Tr",c(1:n), sep = "")
	trans <- as(d.list, "transactions")

	## analyze transactions
	#summary(trans)
	#image(trans)
	rules <- apriori(trans, parameter=list(support=support, confidence=confidence))
	#rules
	#plot(rules)
	return(list(trans=trans,rules=rules))
}
kn <- my.knowledge.rule("ryamada22.txt",support=0.01,confidence=0.05)
#plot(kn$rules, method = "graph", control = list(type = "items"))
subrules2 <- sample(kn$rules,100)
plot(subrules2, method = "graph", control = list(type = "items"))
my.knowledge.graph <- function(file,from=NULL,to=NULL,target.date=NULL,target.node=NULL,max.edge=NULL,alpha=1,layout=layout.kamada.kawai){	library(igraph)
	tmp <- as.matrix(read.table(file,sep="\t",na.string="",fill=TRUE)
)
	dates <- as.numeric(tmp[,1])
	dt <- tmp[,-1]
	n <- length(dt[,1])
	d.list <- list()
	for(i in 1:n){
		tmp2 <- dt[i,]
		d.list[[i]] <- c(tmp2[which(!is.na(tmp2))])
	}

	if(is.null(from))from<-min(dates)
	if(is.null(to))to<-max(dates)
	targets <- which(dates >= from & dates <= to)
	targets <- 1:length(d.list)
	e <- matrix(0,0,2)
	for(i in targets){
		if(length(d.list[[i]]) > 1){
			for(j in 2:length(d.list[[i]])){
				e <- rbind(e,c(d.list[[i]][1],d.list[[i]][j]))
				
			}
			
		}
	}
	# 入力ファイルはひたすら思いつくままに書くので、エッジ重複が出る
	# それを省く
	e.uniq <- unique(e)
	e.weight <- rep(0,length(e.uniq[,1]))
	
	for(i in 1:length(e.uniq[,1])){
		e.weight[i] <- length(which(e[,1]==e.uniq[i,1] & e[,2]==e.uniq[i,2]))
	}
	selected.e <- 1:length(e.weight)
	if(is.null(max.edge)){
		selected.e <- which(e.weight >= quantile(e.weight,1-alpha))
	}else{
		sort.w <- sort(e.weight,decreasing=TRUE)
		tmp.max.edge <- min(max.edge,length(e.weight))
		tmp.w <- sort.w[tmp.max.edge]
		#print(tmp.w)
		selected.e <- which(e.weight >= tmp.w)
		#print(selected.e)
	}
#print(e.uniq[selected.e,])
	library(igraph)
	# 無向グラフオブジェクトにする
	g <- graph.edgelist(e.uniq[selected.e,],directed=FALSE)
	E(g)$weight <- e.weight
	v <- V(g)$name
	
	# ノードの色を付ける
	col <- rep(rgb(0.3,0.3,0.3,alpha=0.3) ,length(v))
	if(!is.null(target.date)){
		# ターゲットノードの文字列を指定して、そのノードの色を変える
		tmp.v <- which(dates[targets]==target.date)
		col[which(v %in% d[tmp.v,1])] <- rgb(1,0.3,0.3,alpha=0.5) 
		#print(dates)
		#print(dim(d))
		#print(tmp.v)
	}
	if(!is.null(target.node)){
		col[which(v %in% target.node)] <- rgb(1,0.3,0.3,alpha=0.5) 

	}

	e.w <- E(g)$weight
	#e.w.2 <- e.w
	#g2 <- g
	#E(g2)$weight <- e.w-1
	#e.w.3 <- as.numeric(e.w.2 > 0)
	plot(g,layout=layout,vertex.color=col,edge.width = e.w)
}
my.knowledge.graph("ryamada22.txt",layout=layout.kamada.kawai)