##### 의사결정나무
library(tree)
iris.tr<-tree(Species~., iris)
plot(iris.tr)
text(iris.tr)
# 데이터를 7:3으로 분리
library(party)
idx<-sample(2, nrow(iris), replace=T, prob=c(0.7, 0.3))
train.data<-iris[idx==2,]
test.data<-iris[idx==1,]
iris.tree<-ctree(Species~., data=train.data)
plot(iris.tree)
plot(iris.tree, type="simple")
# 예측된 데이터와 실제 데이터 비교
table(predict(iris.tree), train.data$Species)
# test data를 적용하여 적확성 확인
test.pre<-predict(iris.tree, newdata=test.data)
table(test.pre, test.data$Species)
##### 앙상블 분석
# 랜덤포레스트
library(randomForest)
idx<-sample(2, nrow(iris), replace=T, prob=c(0.7, 0.3))
train.data<-iris[idx==2,]
test.data<-iris[idx==1,]
r.f<-randomForest(Species~., data=train.data, ntree=100, proximity=T)
table(predict(r.f), train.data$Species)
plot(r.f)
varImpPlot(r.f)
# test data 예측
pre.rf<-predict(r.f, newdata=test.data)
table(pre.rf, test.data$Species)
plot(margin(r.f, test.data$Species))
# 성과분석
library(rpart)
library(party)
library(ROCR)
x<-kyphosis[sample(1:nrow(kyphosis), nrow(kyphosis), replace=F),]
x.train<-kyphosis[1:floor(nrow(x)*0.75),]
x.evaluate<-kyphosis[floor(nrow(x)*0.75):nrow(x),]
x.model<-cforest(Kyphosis~Age+Number+Start, data=x.train)
x.evaluate$prediction<-predict(x.model, newdata=x.evaluate)
x.evaluate$correct<-x.evaluate$prediction == x.evaluate$Kyphosis
print(paste("% of predicted classification correct"), mean(x.evaluate$correct))
x.evaluate$probabilities<-1-unlist(treeresponse(x.model, newdata=x.evaluate),
use.names=F)[seq(1, nrow(x.evaluate)*2, 2)]
pred<-prediction(x.evaluate$probabilities, x.evaluate$Kyphosis)
perf<-performance(pred, "tpr", "fpr")
plot(perf, main="ROC curve", colorize=T)
perf<-performance(pred, "lift", "rpp")
plot(perf, main="lift curve", colorize=T)
##### 로지스틱 회귀분석
b<-glm(Species~Sepal.Length, data=a, family=binomial)
summary(b)
'programing > R studio' 카테고리의 다른 글
merge 함수의 all.x = TRUE (0) | 2018.09.10 |
---|---|
R - 기본함수 - paste / paste0 (0) | 2018.08.22 |
주성분분석 사례 (0) | 2018.07.29 |
계량적MDS와 비계량적MDS 예시 (0) | 2018.07.29 |
시계열 분석 예제 (0) | 2018.07.29 |