저장소

R 지도학습 알고리즘_로지스틱 본문

교육/빅데이터 청년인재_경희대 R

R 지도학습 알고리즘_로지스틱

HB HB 2019. 7. 25. 15:30

###지도학습 알고리즘


##서포트 벡터 머신

auto=read.csv('autoparts.csv',header=T)

auto1=auto[auto$prod_no=='90784-76001',c(2:11)]

auto2=auto1[auto1$c_thickness<1000,]


auto2$y_falty=ifelse((auto2$c_thickness<20)|(auto2$c_thickness>32),1,0)

head(auto2)


t_index=sample(1:nrow(auto2),size=nrow(auto2)*0.7)

train=auto2[t_index,]

test=auto2[-t_index,]

nrow(train);nrow(test)


install.packages('e1071')

library(e1071)

tune.svm(factor(y_falty)~fix_time+a_speed+b_speed+separation+s_separation+rate_terms+mpa+load_time+

           highpressure_time,data=auto2,gamma=2^(-1:1),cost = 2^(2:4))



#훈련데이터 모델로 만든다

m=svm(factor(y_falty)~fix_time+a_speed+b_speed+separation+s_separation+rate_terms+mpa+load_time+

        highpressure_time,data=auto2,gamma=1,cost =16)


yhat_test=predict(m,test)

table=table(reas=test$y_falty,predict(yhat_test))

table


(table[1,1]+table[2,2])/sum(table) #정분류율



l=svm(factor(y_falty)~fix_time+a_speed+b_speed+separation+s_separation+rate_terms+mpa+load_time+

        highpressure_time,data=train,gamma=1,cost =16,kernel='linear')

yhat_test=predict(m,test)

table=table(real=test$y_faulty,predict=yhat_test);table

(table[1,1]+table[2,2])/sum(table)


b=svm(factor(y_falty)~fix_time+a_speed+b_speed+separation+s_separation+rate_terms+mpa+load_time+

        highpressure_time,data=train)

yhat_test=predict(m,test)

table=table(real=test$y_faulty,predict=yhat_test);table

(table[1,1]+table[2,2])/sum(table)


##ROC커브

m=svm(factor(y_falty)~fix_time+a_speed+b_speed+separation+s_separation+rate_terms+mpa+load_time+

        highpressure_time,data=train,gamma=2,cost=16)

yhat_test=predict(m,test)

library(Epi)

install.packages('Epi')


ROC(test=yhat_test,stat=test$y_falty,plot="ROC",AUC=T,main='SVM')



lhat_test=predict(l,test)

ROC(test=lhat_test,stat=test$y_falty,plot="ROC",AUC=T,main='SVM')


bhat_test=predict(b,test)

ROC(test=bhat_test,stat=test$y_falty,plot="ROC",AUC=T,main='SVM')



##예측

new.data=data.frame(fix_time=87,a_speed=0.609,b_speed=1.715,separation=242.7,

                    s_separation=657.5,rate_terms=95,mpa=78,load_time=18.1,

                    highpressure_time=82)

predict(m,newdata=new.data)


new.data=data.frame(fix_time=c(87,85.6),a_speed=c(0.609,0.472),b_speed=c(1.715,1.685),separation=c(242.7,243.3),

                    s_separation=c(657.5,657.9),rate_terms=c(95,95),mpa=c(78,28.8),

                    load_time=c(18.1,18.2),highpressure_time=c(82,60))

predict(m,newdata=new.data)


###서포트 벡터 회귀분석

auto=read.csv('autoparts.csv')

auto1=auto[auto$prod_no=='90784-76991']

auto2=auto1[auto1$c_thinkness<1000,]

auto2$y_falty=ifelse((auto2$c_thickness<20)|(auto2$c_thickness>21),1,0)

t_index=sample(1:nrow(auto2),size=nrow(auto2*0.7))

train=auto2[t_index,]

test=auto2[-t_index,]


m=svm(c_thickness~fix_time+a_speed+b_speed+separation+s_separation+

        rate_terms+mpa+load_time+highpressure_time,data=train,

      gamma=1,cost=16)

yhat_test=predict(m,test)

plot(x=test$c_thickness,y=yhat_test,main='SVR')



m2=lm(c_thickness~fix_time+a_speed+b_speed+separation+s_separation+rate_term+

        mpa+load_time+highpressure_time,data=train)



regression=read.csv('regression.csv',header = T)

plot(regression$x,regression$y,pch=16,xlab='x',ylab='y')


#LM

m1=lm(y~x,data=regression)

p1=predict(m1,newdata=regression)

points(regression$x,p1,col='red',pch='L')


#SVR

m2=

  svm(y~x,regression)

p2=predict(m2,newdata=regression)



## 로지스틱회귀분석

auto=read.csv('autoparts.csv',header=T)

auto1=auto[auto$prod_no=='90784-76001',c(2:11)]

auto2=auto1[auto1$c_thickness<1000,]

auto2$y_faulty=ifelse((auto2$c_thickness<20)|(auto2$c_thickness>32),1,0)

auto2$y_faulty=as.factor(auto2$y_faulty)

table(auto2$y_faulty)

m=glm(y_faulty~fix_time+a_speed+b_speed+separation+

        s_separation+rate_terms+mpa+load_time+

        highpressure_time,data=auto2,family=binomial(logit))


#훈련데이터로만 모델 만들기

t_index=sample(1:nrow(auto2),size=nrow(auto2*0.7))

train=auto2[t_index,]

test=auto2[-t_index,]

m=glm(y_faulty~fix_time+a_speed+b_speed+separation+

        s_separation+rate_terms+mpa+load_time+

        highpressure_time,data=train,family=binomial(logit))


#승산비 기준값 설정하기

yhat=ifelse(m$fitted.values>=0.5,1,0)

yhat


table=table(real=train$y_faulty,predict=yhat)

table

yhat_test=p redict(m,test,type='response')




##다항 로지스틱스 모형 구현 : 범주가 3개 이상일 때

auto2$g_class=as.factor(ifelse(auto2$c_thickness<20,1,ifelse(auto2$c_thickness<32,2,3)))

table(auto2$g_class)

t_index=sample(1:nrow(auto2),size=nrow(auto2)*0.7)

train=auto2[t_index,]

test=auto2[-t_index,]

install.packages('nnet')

library(nnet)

m=multinom(g_class~fix_time+a_speed+b_speed+separation+

             s_separation+rate_terms+mpa+load_time+

             highpressure_time,data=train)

summary(m)

head(m$fitted.values)


yhat_test=predict(m,test)

table=table(real=test$g_class,predict=yhat_test)

table #정오표

(table[1,1]+table[2,2]+table[3,3])/sum(table)

 


Comments