저장소

R 무방향성 데이터분석_연관규칙 본문

교육/빅데이터 청년인재_경희대 R

R 무방향성 데이터분석_연관규칙

HB HB 2019. 7. 27. 11:30

install.packages('arulesViz')

library(arulesViz)

data('Groceries')

summary(Groceries)

head(Groceries) #트랜젝션 데이터라 바로 볼 수X 데이터프레임 변환 필요. gro=as(Groceries,'data.frame')



##R연관규칙 생성 및 주요 규칙확인

rules=apriori(Groceries,parameter=list(support=0.001,confidence=0.5))

#apriori가 A살 때 B산다는 규칙을 만들어주는 함수


rules

inspect(head(sort(rules,by='lift'),3)) #lift값으로 정렬해서 상위 3개 확인


inspect(head(sort(rules,by='lift'),5))



plot(rules)

subrules2=head(sort(rules,by='lift'),10)

plot(subrules2,method = 'graph',control=list(type='items'))



gro=as(Groceries,'data.frame')

View(gro)



install.packages('arules')

library(arules)


update.packages()


data(Adult)

summary(Adult)


Adult.df <- as(Adult, 'data.frame')

Adult.df

rules.adult<-apriori(Adult)

inspect(head(rules.adult))


#rhs rights hands sides left가 먼저 A를 살때 B살 확률에서 A가 left hands

rules.adult1=apriori(Adult,parameter=list(support=0.1,confidence=0.6),

                     appearance = list(rhs=c('income=small','income=large'),

                                       default='lhs'),control=list(verbose=F))



rules.adult1.sorted=sort(rules.adult1,by='lift')

inspect(head(rules.adult1.sorted))

plot(rules.adult1.sorted,method='scatterplot')

plot(rules.adult1.sorted,method='graph',control=list(type='items',alpha=0.5))

a10=head(sort(rules.adult1,by='lift'),10)

plot(a10,method='graph',control=list(type='items',alpha=1))

?plot


titan.df=as.data.frame(Titanic)

head(titan.df)

summary(Titanic)


titanic=NULL

for(i in 1:4){titanic = cbind(titanic, rep(as.character(titan.df[,i]),

                                           titan.df$Freq))}

titanic=as.data.frame(titanic)

names(titanic)=names(titan.df)[1:4]

summary(titanic)

head(titanic);tail(titanic)


rules.all=apriori(titanic)  #apriori로 만들어주고 inspect로 본다

options(digits=3)

inspect(rules.all)


#규칙의 우변(rhs)가 생존여부와 관계된 규칙

#설정값 변경 최소부분집합크기=2, 최소지지도=0.005, 최소신뢰도=0.8

rules=apriori(titanic,control = list(verbose=F), parameter=list(minlen=2,supp=0.005,conf=0.8),

              appearance=list(rhs=c("Survived=No",'Survived=Yes'),

                              default='lhs'))


rules.sorted=sort(rules,by='lift')  #향상도(lift)기준으로 정렬

inspect(rules.sorted)  #규칙확인



#중복되는 규칙 찾기

subset.matrix=is.subset(rules.sorted,rules.sorted)

subset.matrix[lower.tri(subset.matrix,diag=T)]<-FALSE

redundant=colSums(subset.matrix,na.rm=T)>=1

which(redundant)

rules.pruned<-rules.sorted[!redundant]

plot(rules.all) #디폴트 값 measure=c('support','confidence'),shading='lift'

plot(rules.all,shading='order') #규칙번호에 따라 음영 부여

plot(rules.all, method='grouped') #음영이 진할 수록 리프트가 높음


plot(rules.sorted)

plot(rules.sorted,method='grouped')

plot(rules.sorted,method='paracoord',control=list(reorder=T)) #평행좌표그림


#interactive 그림

plot(rules.sorted,measure=c('support','lift'),shading='confidence',interactive=T)


#행렬-기반 시각화

plot(rules.sorted,method='matrix',measure='lift')


plot(rules.sorted,method='matrix',measure=c('lift','confidence'),control = list(reorder=TRUE))

plot(rules.sorted,method='matrix3D',measure='lift',control = list(reorder=TRUE))


##파일로저장

#write



#연습문제

adult78<-apriori(Adult,parameter=list(support=0.7,confidence=0.8))


mybasket=read.transactions('mybasket.csv')

summary(mybasket)

head(mybasket)

str(mybasket)


itemFrequency(mybasket)

itemFrequencyPlot(mybasket)


myb=apriori(mybasket)

inspect(tail(myb))

my10<-apriori(mybasket,parameter=list(support=0.1,confidence=0.0),)


sort(itemFrequency(my10),decreasing = T)

sort(itemFrequency(mybasket))


rules=apriori(my10,control = list(verbose=F),

              appearance=list(rhs=("bakery"),

                              lhs=('snack')))


inspect9(my10, )


#----------------------------------------------------물어보기

mybasket.trans <- read.transactions("mybasket.csv",format="basket",sep=",")

summary(mybasket.trans)

itemFrequency(mybasket.trans) 

mybasket.rules <- apriori(mybasket.trans,parameter=list(support=0.1,confidence=0.0)) 

summary(mybasket.rules)

sort(itemFrequency(mybasket.trans),decreasing = TRUE)

inspect(subset(mybasket.rules,subset=lhs %in% "clothes"& rhs %in% "snack"))


inspect(subset(mybasket.rules,subst=lhs%in% "clothes" & lhs %in% "snack"))


install.packages('sna')

library(sna)

amatrix=rbind(c(0,1,1,1,0),c(1,0,0,1,0),c(1,0,0,0,0),c(1,1,0,0,1),c(0,0,0,1,0))

amatrix

gden(amatrix) #gden 네트워크 밀도 : 0.5 50%만 연결되었다는 뜻

degree(amatrix,gmode='graph') #degree 연결중심성 : 링크 구하는 것

closeness(amatrix,gmode='graph') #거리의 평균 >거점물류센터에서 많이 쓰임

betweenness(amatrix,gmode='graph')

gplot(amatrix)

gplot(amatrix,mode='circle')

plot.sociomatrix(amatrix)

 


Comments