fork download
  1. # install.packages("rpart.plot", repos="http://c...content-available-to-author-only...o.com/", dependencies=TRUE)
  2. # install.packages('rpart', dependencies=TRUE)
  3. # install.packages('e1071', dependencies=TRUE)
  4. # install.packages('aod', dependencies = TRUE)
  5. # install.packages('ggplot2', dep = TRUE)
  6. # install.packages('adabag',dependencies=T)
  7. # install.packages("ada",dep = T)
  8. # install.packages('randomForest', dependencies=TRUE)
  9. # install.packages("gbm")
  10.  
  11. library(rpart)
  12. library(rpart.plot)
  13. library(class)
  14. library(e1071)
  15. library(aod)
  16. library(mlbench)
  17. library(ggplot2)
  18. require(nnet)
  19. library(adabag)
  20. library(ada)
  21. library(randomForest)
  22. library(gbm)
  23.  
  24.  
  25. DataSetList <- c('http://a...content-available-to-author-only...i.edu/ml/machine-learning-databases/wine/wine.data',
  26. 'http://a...content-available-to-author-only...i.edu/ml/machine-learning-databases/adult/adult.data',
  27. 'http://a...content-available-to-author-only...i.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data',
  28. 'http://a...content-available-to-author-only...i.edu/ml/machine-learning-databases/car/car.data',
  29. 'http://a...content-available-to-author-only...i.edu/ml/machine-learning-databases/haberman/haberman.data')
  30.  
  31. classvar <- c(1,15,2,7,4)
  32.  
  33. for(j in 1:5)
  34. {
  35.  
  36. j=1
  37. d=read.csv(DataSetList[j],header = FALSE)
  38.  
  39. d[ is.na(d) ] <- 0
  40. d[d == "?"] <- 0
  41. apply(d,2,function(x) sum(is.na(x)))
  42.  
  43. set.seed(100)
  44.  
  45. bagging_accuracy <- 1:10
  46. adaboost_accuracy <- 1:10
  47. gboosting_accuracy <- 1:10
  48. forest_accuracy <- 1:10
  49. knn_accuracy <- 1:10
  50.  
  51. d<-d[sample(nrow(d)),]
  52. folds <- cut(seq(1,nrow(d)),breaks=10,labels=FALSE)
  53. for(i in 1:10)
  54. {
  55. testIndexes <- which(folds==i,arr.ind=TRUE)
  56. testData <- d[testIndexes, ]
  57. trainingData <- d[-testIndexes, ]
  58.  
  59. trainingData[trainingData == "?"] <- NA
  60. trainingData<- trainingData[complete.cases(trainingData),]
  61. testData[testData == "?"] <- NA
  62. testData<- testData[complete.cases(testData), ]
  63.  
  64. Class<-as.factor(trainingData[,classvar[j]])
  65.  
  66. #-----------Bagging
  67. model <- bagging(Class ~ ., data=trainingData[,-classvar[j]],coob=TRUE)
  68. p=predict(model,testData[,-classvar[j]])
  69. accuracy_bag <- sum(testData[,classvar[j]]==p)/length(p)*100
  70. bagging_accuracy[i] <- accuracy_bag
  71. #cat("Method = ", "Bagging",", accuracy= ", accuracy_bag,"\n")
  72.  
  73.  
  74. #-----------ADA Boosting
  75. model <- boosting(Class ~ ., data = trainingData[,-classvar[j]], boos=TRUE, mfinal=20,coeflearn='Breiman')
  76. p=predict(model,testData[,-classvar[j]])
  77. accuracy_boost <- sum(testData[,classvar[j]]==p)/length(p)*100
  78. adaboosting_accuracy[i] <- accuracy_boost
  79. #cat("Method = ", "Boosting",", accuracy= ", accuracy_boost,"\n")
  80.  
  81. #------------Gradient Boosting
  82. gbm = gbm(Class ~ ., data=trainingData[,-classvar[j]], n.trees=1000,shrinkage=0.01,distribution="gaussian",interaction.depth=7,bag.fraction=0.9,cv.fold=10,n.minobsinnode = 50)
  83. pred <- predict(gbm, newdata=testData[,-classvar[j]],OOB=TRUE, type = "response")
  84. accuracy_gboost <- sum(testData[,classvar[j]]==round(pred))/length(pred)*100
  85. gboosting_accuracy[i] <- accuracy_gboost
  86. # cat("Method = ", "Gradient Boosting",", accuracy= ", accuracy_gboost,"\n")
  87.  
  88. #-----------kNN----------
  89. predicted_knn <- knn(trainingData[,-classvar[j]], testData[,-classvar[j]],Class, k =9)
  90. accuracy_knn <- sum(predicted_knn == testData[,classvar[j]])/length(predicted_knn)*100
  91. knn_accuracy[i] <- accuracy_knn
  92. # cat("Method = ", "Knn",", accuracy= ", accuracy_knn,"\n")
  93.  
  94. #-----------Random Forest
  95. fit <- randomForest(Class ~ ., data=trainingData[,-classvar[j]], mtry=3,importance=TRUE, na.action=na.omit)
  96. pred <- predict(fit, newdata=testData[,-classvar[j]],OOB=TRUE, type = "response")
  97. accuracy_forest <- sum(testData[,classvar[j]]==pred)/length(pred)*100
  98. forest_accuracy[i] <- accuracy_forest
  99. # cat("Method = ", "Random Forest",", accuracy= ", accuracy_forest,"\n")
  100.  
  101.  
  102. cat("\n")
  103.  
  104.  
  105. }
  106.  
  107. acc_forest <- mean(forest_accuracy)
  108. acc_knn <- mean(knn_accuracy)
  109. acc_gboost <- mean(gboosting_accuracy)
  110. acc_boost <- mean(adaboost_accuracy)
  111. #acc_bag <- mean(bagging_accuracy)
  112.  
  113. cat("Total Accuracy-")
  114. cat("Data Set : ",j,"\n")
  115. cat("Method = ", "Random Forest",", accuracy= ", acc_forest,"\n")
  116. cat("Method = ", "Knn",", accuracy= ", acc_knn,"\n")
  117. cat("Method = ", "Gradient Boosting",", accuracy= ", acc_gboost,"\n")
  118. cat("Method = ", "Ada Boosting",", accuracy= ", acc_boost,"\n")
  119. cat("Method = ", "Bagging",", accuracy= ", acc_bag,"\n")
  120.  
  121. }
  122.  
  123.  
  124.  
Success #stdin #stdout #stderr 0.33s 42348KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error in library(aod) : there is no package called ‘aod’
Execution halted