USA King County Data is come from Kaggle. It includes house sold between 2014 / 05 to 2015 / 05. https://www.kaggle.com/harlfoxem/housesalesprediction
## bedrooms bathrooms sqft_living sqft_lot floors waterfront
## 1 -0.3987279 -1.4474301 -0.9798124 -0.2283160 -0.9154058 -0.08717061
## 2 -0.3987279 0.1756026 0.5336220 -0.1898810 0.9364841 -0.08717061
## 3 -1.4739253 -1.4474301 -1.4262210 -0.1232956 -0.9154058 -0.08717061
## 4 0.6764694 1.1494223 -0.1305470 -0.2440088 -0.9154058 -0.08717061
## 5 -0.3987279 -0.1490039 -0.4354115 -0.1696495 -0.9154058 -0.08717061
## 6 0.6764694 3.0970615 3.6367068 2.0961362 -0.9154058 -0.08717061
## view condition grade sqft_above sqft_basement yr_built
## 1 -0.3057524 -0.6291723 -0.5588228 -0.7346906 -0.6586658 -0.5448852
## 2 -0.3057524 -0.6291723 -0.5588228 0.4608302 0.2451357 -0.6810627
## 3 -0.3057524 -0.6291723 -1.4095545 -1.2298053 -0.6586658 -1.2938619
## 4 -0.3057524 2.4442374 -0.5588228 -0.8916782 1.3974827 -0.2044412
## 5 -0.3057524 -0.6291723 0.2919089 -0.1308922 -0.6586658 0.5445355
## 6 -0.3057524 -0.6291723 2.8441039 2.5378966 2.7983750 1.0211570
## yr_renovated re.region sqft_living15 sqft_lot15 category
## 1 -0.2101235 E -0.9433334 -0.2607094 Economical
## 2 4.7465678 D -0.4326762 -0.1878634 Economical
## 3 -0.2101235 D 1.0701150 -0.1723713 Economical
## 4 -0.2101235 B -0.9141530 -0.2845153 Economical
## 5 -0.2101235 D -0.2721839 -0.1928443 Economical
## 6 -0.2101235 D 4.0465169 3.2654907 Economical
#地圖加上價格標示
library(leaflet)
m <- leaflet(data) %>% setView(lng=median(data$long), lat=median(data$lat), zoom = 9)
pal <- colorFactor(c("blue" ,"red","yellow","pink"), domain = c( "Economical","Luxury","Expensive","Slum"))
m %>% addTiles() %>%
addCircleMarkers(
radius = ~ifelse(category == "Luxury", 3, 2),
color = ~pal(data$category),
stroke = FALSE, fillOpacity = 0.8
)
## Assuming "long" and "lat" are longitude and latitude, respectively
#`re-region`地圖
library(leaflet)
m <- leaflet(data) %>% setView(lng=median(data$long), lat=median(data$lat), zoom = 9)
pal <- colorFactor(c("#A20055", "#FFFF00", "#00DD00", "#4400CC", "#CC00FF"), domain = c( "A","B","C","D","E","F"))
m %>% addTiles() %>%
addCircleMarkers(
radius = ~ifelse(data_2$re.region == "A", 2, 1),
color = ~pal(data_2$re.region),
stroke = FALSE, fillOpacity = 0.8
)
## Assuming "long" and "lat" are longitude and latitude, respectively
smp.size <- floor(0.8*nrow(data_2))
set.seed(516)
train.ind <- sample(seq_len(nrow(data_2)), smp.size)
train <- data_2[train.ind, ] # 80%
test <- data_2[-train.ind, ] # 20%
head(train)
## bedrooms bathrooms sqft_living sqft_lot floors waterfront
## 10188 -0.3987279 0.5002092 0.2396455 0.30079378 0.9364841 -0.08717061
## 994 -0.3987279 -0.1490039 -0.6422838 -0.18000665 -0.9154058 -0.08717061
## 11173 2.8268641 3.0970615 1.5462076 -0.15941299 0.9364841 -0.08717061
## 4737 -0.3987279 -0.4736105 -0.8491562 -0.15226677 -0.9154058 -0.08717061
## 13498 -0.3987279 0.5002092 -0.2285392 -0.33005308 0.9364841 -0.08717061
## 13082 -0.3987279 -0.4736105 -0.6096198 -0.09710087 -0.9154058 -0.08717061
## view condition grade sqft_above sqft_basement
## 10188 -0.3057524 -0.6291723 1.1426405 0.61781776 -0.6586658
## 994 -0.3057524 -0.6291723 -0.5588228 -0.36033564 -0.6586658
## 11173 -0.3057524 -0.6291723 -0.5588228 2.06693389 -0.6586658
## 4737 -0.3057524 0.9075325 0.2919089 -0.58977903 -0.6586658
## 13498 -0.3057524 -0.6291723 -0.5588228 0.09855114 -0.6586658
## 13082 -0.3057524 -0.6291723 0.2919089 -0.32410773 -0.6586658
## yr_built yr_renovated re.region sqft_living15 sqft_lot15
## 10188 0.8168906 -0.2101235 D 0.7199501 0.1421593
## 994 0.5785799 -0.2101235 E -0.5785782 -0.1821500
## 11173 0.3062247 -0.2101235 D -0.6369391 -0.1577947
## 4737 -0.2725300 -0.2101235 C -0.9287432 -0.1453424
## 13498 1.1232902 -0.2101235 E -0.3889056 -0.3998090
## 13082 0.4083579 -0.2101235 D 0.4719166 -0.1143215
## category
## 10188 Economical
## 994 Slum
## 11173 Slum
## 4737 Expensive
## 13498 Slum
## 13082 Economical
head(test)
## bedrooms bathrooms sqft_living sqft_lot floors waterfront
## 3 -1.4739253 -1.4474301 -1.4262210 -0.1232956 -0.91540583 -0.08717061
## 10 -0.3987279 0.5002092 -0.2067632 -0.2063463 0.93648411 -0.08717061
## 19 -1.4739253 -1.4474301 -0.9580363 -0.1269170 -0.91540583 -0.08717061
## 26 -0.3987279 -0.1490039 -0.4027475 -0.2513240 0.01053914 -0.08717061
## 39 0.6764694 -1.4474301 -0.9362603 -0.1697702 -0.91540583 -0.08717061
## 42 0.6764694 0.1756026 2.3301448 0.2191917 -0.91540583 -0.08717061
## view condition grade sqft_above sqft_basement yr_built
## 3 -0.3057524 -0.6291723 -1.4095545 -1.22980532 -0.65866580 -1.29386186
## 10 -0.3057524 -0.6291723 -0.5588228 0.12270307 -0.65866580 1.08924580
## 19 -0.3057524 0.9075325 -0.5588228 -0.71053870 -0.65866580 -1.70239460
## 26 -0.3057524 2.4442374 -1.4095545 -0.09466435 -0.65866580 -1.02150670
## 39 -0.3057524 -2.1658772 -0.5588228 -1.08489371 0.08697046 -0.06826364
## 42 -0.3057524 -0.6291723 0.2919089 0.98009679 3.00173039 0.44240229
## yr_renovated re.region sqft_living15 sqft_lot15 category
## 3 -0.2101235 D 1.0701150 -0.1723713 Economical
## 10 -0.2101235 E 0.5886382 -0.1903905 Slum
## 19 -0.2101235 E -1.3518591 -0.2810359 Slum
## 26 -0.2101235 E -1.3956297 -0.2953195 Slum
## 39 -0.2101235 E -1.0162844 -0.1819669 Economical
## 42 -0.2101235 E 0.6178186 0.6536928 Economical
svm.modle <- svm(formula = factor(category) ~ ., # 依變數的資料形態要是Factor
data = train)
summary(svm.modle) # 可以看到SVM預設的參數設定
##
## Call:
## svm(formula = factor(category) ~ ., data = train)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.04761905
##
## Number of Support Vectors: 10848
##
## ( 4936 2306 3056 550 )
##
##
## Number of Classes: 4
##
## Levels:
## Economical Expensive Luxury Slum
train.pred <- predict(svm.modle, train)
table(real=train$category, predict=train.pred)
## predict
## real Economical Expensive Luxury Slum
## Economical 6912 775 5 954
## Expensive 1150 2591 21 11
## Luxury 47 403 106 0
## Slum 956 34 0 3325
#training error
1-sum(diag(table(real=train$category, predict=train.pred)))/sum(table(real=train$category, predict=train.pred))
## [1] 0.2519375
# 訓練資料的分類準確率
test.pred <- predict(svm.modle, test)
confus.matrix <- table(real=test$category, predict=test.pred)
confus.matrix
## predict
## real Economical Expensive Luxury Slum
## Economical 1693 224 1 238
## Expensive 306 609 7 1
## Luxury 13 109 30 0
## Slum 264 14 0 814
#testing error
1-sum(diag(confus.matrix))/sum(confus.matrix)
## [1] 0.2722646
## Loaded gbm 2.1.5
gbm <- gbm(category~., train, distribution = "multinomial",
n.trees=1000, n.minobsinnode = 10, cv.folds = 5)
best.iter <- gbm.perf(gbm,method="cv")
print(best.iter)
## [1] 1000
#training error
gbm.predict <- as.data.frame(predict(gbm,train))
## Using 1000 trees...
head(gbm.predict)
## Economical.1000 Expensive.1000 Luxury.1000 Slum.1000
## 1 1.3174820 -0.9287836 -5.005131 -2.320543
## 2 1.1466451 -3.2002124 -6.513454 2.272589
## 3 1.9762202 -3.1827462 -6.656051 3.006480
## 4 0.1911357 1.4040416 -2.516033 -4.891224
## 5 1.5081256 -3.9032253 -6.907769 3.428508
## 6 0.8802032 -0.5016033 -4.841015 -2.733155
p.gbm.predict <- as.data.frame(apply(gbm.predict, 1, which.max))
colnames(p.gbm.predict) <- "gbm.predict"
p.gbm.predict <- p.gbm.predict %>%
mutate(gbm.predict = gbm.predict) %>%
mutate(gbm.predict = gsub("1","Economical",gbm.predict)) %>%
mutate(gbm.predict = gsub("2","Expensive",gbm.predict)) %>%
mutate(gbm.predict = gsub("3","Luxury",gbm.predict)) %>%
mutate(gbm.predict = gsub("4","Slum",gbm.predict))
confus.matrix <- table(real = train$category, predict = p.gbm.predict$gbm.predict)
confus.matrix
## predict
## real Economical Expensive Luxury Slum
## Economical 7024 715 14 893
## Expensive 1257 2439 66 11
## Luxury 42 340 170 4
## Slum 910 30 4 3371
error.rate <- 1-sum(diag(confus.matrix))/sum(confus.matrix)
error.rate
## [1] 0.247889
#testing error
gbm.predict <- as.data.frame(predict(gbm,test))
## Using 1000 trees...
p.gbm.predict <- as.data.frame(apply(gbm.predict, 1, which.max))
colnames(p.gbm.predict) <- "gbm.predict"
p.gbm.predict <- p.gbm.predict %>%
mutate(gbm.predict = gbm.predict) %>%
mutate(gbm.predict = gsub("1","Economical",gbm.predict)) %>%
mutate(gbm.predict = gsub("2","Expensive",gbm.predict)) %>%
mutate(gbm.predict = gsub("3","Luxury",gbm.predict)) %>%
mutate(gbm.predict = gsub("4","Slum",gbm.predict))
confus.matrix <- table(real = test$category, predict = p.gbm.predict$gbm.predict)
confus.matrix
## predict
## real Economical Expensive Luxury Slum
## Economical 1718 188 8 242
## Expensive 315 582 25 1
## Luxury 15 87 47 3
## Slum 258 15 1 818
error.rate <- 1-sum(diag(confus.matrix))/sum(confus.matrix)
error.rate
## [1] 0.2678695
summary(gbm,n.trees=best.iter) # 個解釋變數的重要程度
## var rel.inf
## re.region re.region 53.5978800
## sqft_living sqft_living 12.8116832
## yr_built yr_built 5.2109368
## grade grade 4.5566718
## sqft_living15 sqft_living15 3.4359664
## sqft_lot15 sqft_lot15 3.3855800
## view view 2.7951582
## sqft_above sqft_above 2.7847761
## sqft_lot sqft_lot 2.7205827
## sqft_basement sqft_basement 2.5049506
## waterfront waterfront 1.5983205
## condition condition 1.3503481
## bedrooms bedrooms 0.9257865
## bathrooms bathrooms 0.8880155
## yr_renovated yr_renovated 0.8306173
## floors floors 0.6027263
## Loading required package: rpart
## Loading required package: caret
## Loading required package: lattice
## Loading required package: foreach
## Loading required package: doParallel
## Loading required package: iterators
## Loading required package: parallel
adaboost.m <- boosting(category ~ ., data = train)
#training error
train.adaboost.predict <-predict(adaboost.m,train)
train.confus.matrix <- table(real = train$category, predict = train.adaboost.predict$class)
train.confus.matrix
## predict
## real Economical Expensive Slum
## Economical 6578 988 1080
## Expensive 1382 2352 39
## Luxury 89 463 4
## Slum 1006 57 3252
train.error.rate <- train.adaboost.predict$error
train.error.rate
## [1] 0.2954309
#testing error
adaboost.predict <- predict(adaboost.m,test)
confus.matrix <- table(real = test$category, predict = adaboost.predict$class)
confus.matrix
## predict
## real Economical Expensive Slum
## Economical 1621 257 278
## Expensive 350 569 4
## Luxury 26 124 2
## Slum 272 23 797
error.rate <- adaboost.predict$error
error.rate
## [1] 0.3090446
#AdaBoost algorithm with different numbers of classifiers
error <- as.numeric()
for(i in 1:60){
adaboost_fit <- boosting(category~., data=train, mfinal=i)
adaboost_predict <- predict.boosting(adaboost_fit,newdata = test)
error[i] <- adaboost_predict$error
}
plot(error,type = "l")
points(which(error == min(error)),min(error),col = "pink",pch=19,cex=1.2)
text(which(error == min(error))+5,min(error)+0.001,round(min(error),4),cex=0.7)
abline(h=error.rate,col="red")
text(5,error.rate,"mfinal = 100",error.rate,cex=0.7)
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
xgb.train <- sparse.model.matrix(category ~ .-1, data = train)
xgb.test <- sparse.model.matrix(category ~ .-1, data = test)
Y <- as.integer(data_2$category) - 1
param <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"num_class" = 4)
cv.model <- xgb.cv(
params = param,
data = xgb.train,
label=Y[train.ind],
nfold = 5, # 5-fold cv
nrounds=200, # 測試1-100,各個樹總數下的模型
# 如果當nrounds < 30 時,就已經有overfitting情況發生,那表示不用繼續tune下去了,可以提早停止
early_stopping_rounds = 30,
print_every_n = 20 # 每20個單位才顯示一次結果,
)
## [1] train-mlogloss:1.129162+0.001991 test-mlogloss:1.141337+0.003697
## Multiple eval metrics are present. Will use test_mlogloss for early stopping.
## Will train until test_mlogloss hasn't improved in 30 rounds.
##
## [21] train-mlogloss:0.492367+0.003904 test-mlogloss:0.624576+0.013018
## [41] train-mlogloss:0.407712+0.003773 test-mlogloss:0.613407+0.013818
## [61] train-mlogloss:0.353745+0.004061 test-mlogloss:0.614434+0.014775
## Stopping. Best iteration:
## [48] train-mlogloss:0.387375+0.003866 test-mlogloss:0.612533+0.014233
tmp <- cv.model$evaluation_log
plot(x=1:nrow(tmp), y= tmp$train_mlogloss_mean
, col='red', xlab="nround", ylab="error", main="Avg.Performance in CV")
points(x=1:nrow(tmp), y= tmp$test_mlogloss_mean, col='blue')
legend("topright", pch=1, col = c("red", "blue"),
legend = c("Train", "Validation") )
best.nrounds <- cv.model$best_iteration
best.nrounds
## [1] 48
xgb.m <- xgboost(param = param, data = xgb.train, label = Y[train.ind], nrounds = best.nrounds)
## [1] train-mlogloss:1.130233
## [2] train-mlogloss:0.982262
## [3] train-mlogloss:0.878376
## [4] train-mlogloss:0.805090
## [5] train-mlogloss:0.750401
## [6] train-mlogloss:0.709056
## [7] train-mlogloss:0.676003
## [8] train-mlogloss:0.649906
## [9] train-mlogloss:0.627689
## [10] train-mlogloss:0.609096
## [11] train-mlogloss:0.593382
## [12] train-mlogloss:0.579202
## [13] train-mlogloss:0.568102
## [14] train-mlogloss:0.557642
## [15] train-mlogloss:0.547872
## [16] train-mlogloss:0.540220
## [17] train-mlogloss:0.532291
## [18] train-mlogloss:0.524282
## [19] train-mlogloss:0.518107
## [20] train-mlogloss:0.510698
## [21] train-mlogloss:0.506133
## [22] train-mlogloss:0.500622
## [23] train-mlogloss:0.495493
## [24] train-mlogloss:0.490939
## [25] train-mlogloss:0.485078
## [26] train-mlogloss:0.481138
## [27] train-mlogloss:0.475833
## [28] train-mlogloss:0.470954
## [29] train-mlogloss:0.467318
## [30] train-mlogloss:0.463604
## [31] train-mlogloss:0.459022
## [32] train-mlogloss:0.455061
## [33] train-mlogloss:0.452483
## [34] train-mlogloss:0.448760
## [35] train-mlogloss:0.445700
## [36] train-mlogloss:0.441747
## [37] train-mlogloss:0.438776
## [38] train-mlogloss:0.435880
## [39] train-mlogloss:0.433537
## [40] train-mlogloss:0.430380
## [41] train-mlogloss:0.426177
## [42] train-mlogloss:0.422699
## [43] train-mlogloss:0.419195
## [44] train-mlogloss:0.416899
## [45] train-mlogloss:0.413884
## [46] train-mlogloss:0.410645
## [47] train-mlogloss:0.408554
## [48] train-mlogloss:0.405254
xgb.pred.train <- predict(xgb.m,xgb.train)
xgb.pred.train <- t(matrix(xgb.pred.train,4,length(xgb.pred.train)/4))
xgb.pred.train <- levels(data_2$category)[max.col(xgb.pred.train)]
xgb.pred.train <- factor(xgb.pred.train,levels=levels(data_2$category))
confusion.matrix.train <- table(real = train$category,predict = xgb.pred.train)
confusion.matrix.train
## predict
## real Economical Expensive Luxury Slum
## Economical 7526 465 2 653
## Expensive 725 3031 11 6
## Luxury 23 128 405 0
## Slum 582 25 0 3708
training.error <- 1-sum(diag(confusion.matrix.train))/sum(confusion.matrix.train)
training.error
## [1] 0.1515327
xgb.pred.test <- predict(xgb.m,xgb.test)
xgb.pred.test <- t(matrix(xgb.pred.test,4,length(xgb.pred.test)/4))
xgb.pred.test <- levels(data_2$category)[max.col(xgb.pred.test)]
xgb.pred.test <- factor(xgb.pred.test,levels=levels(data_2$category))
confusion.matrix.test <- table(real = test$category,predict = xgb.pred.test)
confusion.matrix.test
## predict
## real Economical Expensive Luxury Slum
## Economical 1723 199 8 226
## Expensive 304 585 34 0
## Luxury 12 81 58 1
## Slum 268 16 1 807
testing.error <- 1-sum(diag(confusion.matrix.test))/sum(confusion.matrix.test)
testing.error
## [1] 0.266019
imp <- xgb.importance(xgb.m$feature_names,model=xgb.m)
xgb.plot.importance(imp)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
randomForest.m <- randomForest(category ~., data = train,
importance = TRUE,proximity=TRUE,
ntree = 100,norm.votes=TRUE)
plot(randomForest.m,col=c("pink","blue","red","green","yellow"),
type = "l",lwd = 2)
legend(65,0.65,lty=c(1,2,2,2,2),col = c("pink","blue","red","green","yellow"),
legend = c("OOB",levels(train$category)),cex=0.6 )
tuneRF(train[,-17], train[,17])
## mtry = 4 OOB error = 26.69%
## Searching left ...
## mtry = 2 OOB error = 27.24%
## -0.02058505 0.05
## Searching right ...
## mtry = 8 OOB error = 26.82%
## -0.004983749 0.05
## mtry OOBError
## 2.OOB 2 0.2724118
## 4.OOB 4 0.2669173
## 8.OOB 8 0.2682475
rf.model <- randomForest(category ~.,
data = train,
ntree = 60,
mtry = 4)
rf.predict <- predict(rf.model, train)
confusion.matrix.train <- table(real = train$category,predict = rf.predict)
confusion.matrix.train
## predict
## real Economical Expensive Luxury Slum
## Economical 8629 5 1 11
## Expensive 12 3758 0 3
## Luxury 4 4 548 0
## Slum 30 4 0 4281
training.error <- 1-sum(diag(confusion.matrix.train))/sum(confusion.matrix.train)
training.error
## [1] 0.004279931
rf.predict <- predict(rf.model, test)
confusion.matrix.test <- table(real = test$category,predict = rf.predict)
confusion.matrix.test
## predict
## real Economical Expensive Luxury Slum
## Economical 1722 212 3 219
## Expensive 300 606 16 1
## Luxury 15 90 46 1
## Slum 291 13 0 788
testing.error <- 1-sum(diag(confusion.matrix.test))/sum(confusion.matrix.test)
testing.error
## [1] 0.2685635
varImpPlot(rf.model)
data.conclusion <- data[,-c(1,2,3,18,19,22,23,24,25,26,27)]
colnames(data.conclusion)[14] <- "re.region"
data.conclusion[14] <- data_2[14]
data.conclusion <- data.conclusion %>%
mutate(re.region = gsub("A", 3, re.region)) %>%
mutate(re.region = gsub("B", 4, re.region)) %>%
mutate(re.region = gsub("C", 5, re.region)) %>%
mutate(re.region = gsub("D", 2, re.region)) %>%
mutate(re.region = gsub("E", 1, re.region)) %>%
mutate(re.region = gsub("F", 6, re.region))
data.conclusion$re.region <- as.numeric(data.conclusion$re.region)
data.conclusion[,-17] <- scale(data.conclusion[,-17])
train.knn <- data.conclusion[train.ind, ]
test.knn <- data.conclusion[-train.ind, ]
#k=1~100
predicted.knn <- NULL
error.rate <- NULL
for(i in 1:100){
predicted_knn <- knn(train.knn[,-17],test.knn[,-17],train.knn[,17],k=i)
error.rate[i] <- mean(test$category != predicted_knn)
}
print(error.rate)
## [1] 0.3687254 0.3698820 0.3400416 0.3351839 0.3354152 0.3310201 0.3189914
## [8] 0.3169096 0.3173722 0.3141337 0.3152903 0.3134397 0.3143650 0.3171409
## [15] 0.3199167 0.3208420 0.3178348 0.3194541 0.3145963 0.3127458 0.3162156
## [22] 0.3159843 0.3099699 0.3099699 0.3088133 0.3099699 0.3106639 0.3115892
## [29] 0.3129771 0.3108952 0.3099699 0.3113579 0.3132084 0.3122831 0.3152903
## [36] 0.3136711 0.3139024 0.3157529 0.3141337 0.3150590 0.3173722 0.3150590
## [43] 0.3141337 0.3148277 0.3152903 0.3136711 0.3148277 0.3129771 0.3143650
## [50] 0.3148277 0.3143650 0.3150590 0.3150590 0.3178348 0.3162156 0.3141337
## [57] 0.3189914 0.3143650 0.3166782 0.3176035 0.3173722 0.3199167 0.3196854
## [64] 0.3217673 0.3217673 0.3229239 0.3210733 0.3210733 0.3217673 0.3194541
## [71] 0.3185288 0.3215360 0.3203794 0.3219986 0.3229239 0.3231552 0.3259311
## [78] 0.3252371 0.3250058 0.3254684 0.3261624 0.3238492 0.3236179 0.3250058
## [85] 0.3250058 0.3266250 0.3266250 0.3261624 0.3275503 0.3273190 0.3263937
## [92] 0.3268563 0.3263937 0.3256997 0.3256997 0.3275503 0.3268563 0.3282443
## [99] 0.3270877 0.3289382
plot (x = 1:100, y = error.rate,
xlab = "k of KNN",
ylab = "error.rate",
main = "conclusion plot",
xlim = c(1,100), ylim = c(0,max(error.rate)), pch = 16,
col = 2, cex = 0.5)
text(90,0.35,"knn",cex=0.8,col = "red")
points(which(error.rate == min(error.rate)),min(error.rate),col = "red",pch=1,cex=1.8)
text(which(error.rate == min(error.rate)),min(error.rate)+0.025,"knn.min.error ",cex=0.8,col = "red")
points(15,0.2519375,col = "pink",pch=19,cex=1.2)
points(15,0.2722646,col = "pink",pch=19,cex=1.2)
text(9,0.2519375-0.025,"svm.training.error",cex=0.8,col = "pink")
text(9,0.2722646+0.025,"svm.testing.error",cex=0.8,col = "pink")
points(30,0.247889,col = "blue",pch=19,cex=1.2)
points(30,0.2678695,col = "blue",pch=19,cex=1.2)
text(31,0.247889-0.025,"gbm.training.error",cex=0.8,col = "blue")
text(31,0.2678695+0.025,"gbm.testing.error",cex=0.8,col = "blue")
points(45,0.2954309,col = "orange",pch=19,cex=1.2)
points(45,0.3090446,col = "orange",pch=19,cex=1.2)
text(46,0.2954309-0.025,"ada.training.error",cex=0.8,col = "orange")
text(46,0.3090446+0.025,"ada.testing.error",cex=0.8,col = "orange")
points(60,0.3090446,col = "green",pch=19,cex=1.2)
points(60,0.266019,col = "green",pch=19,cex=1.2)
text(61,0.3090446+0.025,"ada.best.iter.training.error",cex=0.8,col = "green")
text(61,0.266019-0.025,"ada.best.iter.testing.error",cex=0.8,col = "green")
points(75,0.1515327,col = "black",pch=19,cex=1.2)
points(75,0.266019,col = "black",pch=19,cex=1.2)
text(76,0.1515327-0.025,"xgb.cv.training.error",cex=0.8,col = "black")
text(76,0.266019+0.025,"xgb.cv.testing.error",cex=0.8,col = "black")
points(90,0.004279931,col = "darkviolet",pch=19,cex=1.2)
points(90,0.2685635,col = "darkviolet",pch=19,cex=1.2)
text(91,0.004279931+0.025,"rf.best.iter.training.error",cex=0.8,col = "darkviolet")
text(91 ,0.2685635-0.025,"rf.best.iter.testing.error",cex=0.8,col = "darkviolet")