##### 의사결정나무

library(tree)

iris.tr<-tree(Species~., iris)

plot(iris.tr)

text(iris.tr)



# 데이터를 7:3으로 분리 

library(party)

idx<-sample(2, nrow(iris), replace=T, prob=c(0.7, 0.3))

train.data<-iris[idx==2,]

test.data<-iris[idx==1,]


iris.tree<-ctree(Species~., data=train.data)

plot(iris.tree)


plot(iris.tree, type="simple")


# 예측된 데이터와 실제 데이터 비교

table(predict(iris.tree), train.data$Species)


# test data를 적용하여 적확성 확인

test.pre<-predict(iris.tree, newdata=test.data)

table(test.pre, test.data$Species)





##### 앙상블 분석

# 랜덤포레스트

library(randomForest)

idx<-sample(2, nrow(iris), replace=T, prob=c(0.7, 0.3))

train.data<-iris[idx==2,]

test.data<-iris[idx==1,]

r.f<-randomForest(Species~., data=train.data, ntree=100, proximity=T)


table(predict(r.f), train.data$Species)


plot(r.f)


varImpPlot(r.f)


# test data 예측

pre.rf<-predict(r.f, newdata=test.data)

table(pre.rf, test.data$Species)


plot(margin(r.f, test.data$Species))




# 성과분석

library(rpart)

library(party)

library(ROCR)


x<-kyphosis[sample(1:nrow(kyphosis), nrow(kyphosis), replace=F),]

x.train<-kyphosis[1:floor(nrow(x)*0.75),]

x.evaluate<-kyphosis[floor(nrow(x)*0.75):nrow(x),]

x.model<-cforest(Kyphosis~Age+Number+Start, data=x.train)

x.evaluate$prediction<-predict(x.model, newdata=x.evaluate)

x.evaluate$correct<-x.evaluate$prediction == x.evaluate$Kyphosis

print(paste("% of predicted classification correct"), mean(x.evaluate$correct))

x.evaluate$probabilities<-1-unlist(treeresponse(x.model, newdata=x.evaluate), 

                                   use.names=F)[seq(1, nrow(x.evaluate)*2, 2)]

pred<-prediction(x.evaluate$probabilities, x.evaluate$Kyphosis)

perf<-performance(pred, "tpr", "fpr")

plot(perf, main="ROC curve", colorize=T)


perf<-performance(pred, "lift", "rpp")

plot(perf, main="lift curve", colorize=T)





##### 로지스틱 회귀분석

b<-glm(Species~Sepal.Length, data=a, family=binomial)

summary(b)



'programing > R studio' 카테고리의 다른 글

merge 함수의 all.x = TRUE  (0) 2018.09.10
R - 기본함수 - paste / paste0  (0) 2018.08.22
주성분분석 사례  (0) 2018.07.29
계량적MDS와 비계량적MDS 예시  (0) 2018.07.29
시계열 분석 예제  (0) 2018.07.29

+ Recent posts