fork download
  1. library(dplyr) # data function
  2. library(data.table) # data format
  3. library(snow) #paralle computing
  4. library(hydroGOF) #RMSE
  5.  
  6.  
  7. set.seed(524287)
  8.  
  9. Cartype<-'31'
  10. FreewayNum<-'01'
  11. DT<-fread("C:/Users/lwang2/Documents/R/20160420SpeedCount/01S_20160301_20160331.csv",na.strings = 'NULL')
  12. DT <- DT[,which(unlist(lapply(DT, function(x)!all(is.na(x))))),with=F]
  13. DT<-filter(DT,Category==Cartype)
  14.  
  15. drops<-c('Date','Category','Hour')
  16. DT[,drops[1:length(drops)]:=NULL]
  17.  
  18.  
  19. # replace missing values with Normal distribution Mean(col) sd(col)
  20.  
  21. #for(j in seq_len(ncol(DT)))
  22. #{
  23. # ##Sys.sleep(0.1)
  24. # set(DT,which(is.na(DT[[j]])),j,abs(rnorm(5000,mean=mean(DT[[j]],na.rm=T),sd=sd(DT[[j]],na.rm = T))))
  25. # ##print(j)
  26. # ##setTxtProgressBar(pb, i)
  27. #}
  28.  
  29. ################################ Replace NA by Each Column Min################################
  30. for(j in seq_len(ncol(DT)))
  31. {
  32. ##Sys.sleep(0.1)
  33. set(DT,which(is.na(DT[[j]])),j,min(DT[[j]],na.rm = T))
  34. ##print(j)
  35. ##setTxtProgressBar(pb, i)
  36. }
  37.  
  38.  
  39. TrainDT<-DT[1:round(0.7*nrow(DT),0)]
  40. TrainDT<-select(TrainDT,starts_with(FreewayNum))
  41.  
  42. PracticeDT<-DT[round(0.7*nrow(DT),0):nrow(DT)]
  43. PracticeDT<-select(PracticeDT,starts_with(FreewayNum))
  44.  
  45. summaryDF<-data.frame(GantryID=character(),Cor=double(),RMSE=double(),ResidualMin=double(),ResidualQ1=double(),ResidualMedian=double(),ResidualMean=double(),ResidualQ3=double(),ResidualMax=double(),NaNum=double())
  46.  
  47.  
  48.  
  49. #######Function Here: Remove outlier & addq####################################
  50.  
  51. remove_outliers<-function(x,na.rm=T)
  52. {
  53. qnt<-quantile(x,probs = c(.25,.75),na.rm = na.rm)
  54. H<-1.5*IQR(x,na.rm = na.rm)
  55. y<-x
  56. y[x<(qnt[1]-H)]<-NA
  57. y[x>(qnt[2]+H)]<-NA
  58. return(y)
  59. }
  60. addq<-function(x) paste0("`",x, "`")
  61. ##Test<-data.frame()
  62. ##GantryTestName<-data.frame(Intercept=numeric(),cbind(Test,t(as.matrix(lapply(names(TrainDT),addq)))))
  63.  
  64.  
  65.  
  66.  
  67.  
  68. ######Model with shift 30 Mins: y8:30=x8:00
  69. system.time(for (i in 1:2)
  70. {
  71. print(i)
  72. TrainModel<-cbind(setnames(TrainDT[7:nrow(TrainDT),i,with=F],paste0(names(TrainDT[1,i,with=FALSE]),'_y')),TrainDT[1:(nrow(TrainDT)-6),1:length(TrainDT),with=F])
  73. PracticeModel<-cbind(setnames(PracticeDT[7:nrow(PracticeDT),i,with=F],paste0(names(TrainDT[1,i,with=FALSE]),'_y')),PracticeDT[1:(nrow(PracticeDT)-6),1:length(PracticeDT),with=F])
  74. resp<-grep('_y',names(TrainModel),value=T)
  75. pre<-grep(FreewayNum,names(TrainModel),value =T)
  76. pre<-pre[2:length(pre)]
  77. Model<-as.formula(paste(addq(resp),paste(lapply(pre, addq),collapse = '+'),sep = '~'))
  78.  
  79. FitModel<-lm(Model,data=TrainModel)
  80. #Fitmodel<-lm(`01F0017S_y`~.,data=TrainModel)
  81. #Fitmodel<-lm(as.matrix(TrainDT[7:nrow(TrainDT),i,with=F])~as.matrix(TrainDT[1:(nrow(TrainDT)-6),1:length(TrainDT),with=F]),data=TrainDT)
  82. stepwise<-step(FitModel,sacle=0,direction = 'both')
  83. predictresidual<-PracticeModel[[1]]-predict(stepwise,PracticeModel)
  84. RMSE<-rmse(remove_outliers(predict(stepwise,PracticeModel)),remove_outliers(PracticeModel[[1]]),na.rm = T)
  85. Gantryname<-names(TrainDT[1,i,with=FALSE])
  86. write.csv(stepwise$coefficients,file = paste0(Gantryname,'_Coefficients','.csv'))
  87. write.csv(cbind(TrainModel[[1]],stepwise$fitted.values,stepwise$residuals),file = paste0(Gantryname,'_Residual','.csv'))
  88. write.csv(cbind(PracticeModel[[1]],predict(stepwise,PracticeModel),predictresidual),file = paste0(Gantryname,'_Predict','.csv'))
  89. SumResidual<-summary(remove_outliers(predictresidual))
  90. unlistResidual<-data.frame(matrix(unlist(SumResidual),ncol = 7,byrow = T))
  91. summaryDF<-rbind(summaryDF,cbind(Gantryname,cor(PracticeModel[[1]],predict(stepwise,PracticeModel)),RMSE,unlistResidual))
  92. ##summaryTest<-rbind(summaryTest,cbind(data.frame(t(as.matrix(unlist(stepwise$coefficients),ncol=length(stepwise$coefficients),byrow=T)))))
  93.  
  94. #PredictData<-predict(stepwise,PracticeDT)
  95. if (i ==2)({
  96. colnames(summaryDF)<-c('GantryID','Cor','RMSE','ResidualMin','ResidualQ1','ResidualMedian','ResidualMean','ResidualQ3','ResidualMax','NaNum')
  97. write.csv(summaryDF,file = 'Summary.csv')
  98. })
  99.  
  100. })
Success #stdin #stdout #stderr 0.48s 79168KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error in library(dplyr) : there is no package called ‘dplyr’
Execution halted