library(dplyr) # data function
library(data.table) # data format
library(snow) #paralle computing
library(hydroGOF) #RMSE
set.seed(524287)
Cartype<-'31'
FreewayNum<-'01'
DT
<-fread("C:/Users/lwang2/Documents/R/20160420SpeedCount/01S_20160301_20160331.csv",na.
strings = 'NULL')DT <- DT[,which(unlist(lapply(DT, function(x)!all(is.na(x))))),with=F]
DT<-filter(DT,Category==Cartype)
drops<-c('Date','Category','Hour')
DT[,drops[1:length(drops)]:=NULL]
# replace missing values with Normal distribution Mean(col) sd(col)
#for(j in seq_len(ncol(DT)))
#{
# ##Sys.sleep(0.1)
# set(DT,which(is.na(DT[[j]])),j,abs(rnorm(5000,mean=mean(DT[[j]],na.rm=T),sd=sd(DT[[j]],na.rm = T))))
# ##print(j)
# ##setTxtProgressBar(pb, i)
#}
################################ Replace NA by Each Column Min################################
for(j in seq_len(ncol(DT)))
{
##Sys.sleep(0.1)
set(DT,which(is.na(DT[[j]])),j,min(DT[[j]],na.rm = T))
##print(j)
##setTxtProgressBar(pb, i)
}
TrainDT<-DT[1:round(0.7*nrow(DT),0)]
TrainDT<-select(TrainDT,starts_with(FreewayNum))
PracticeDT<-DT[round(0.7*nrow(DT),0):nrow(DT)]
PracticeDT<-select(PracticeDT,starts_with(FreewayNum))
summaryDF<-data.frame(GantryID=character(),Cor=double(),RMSE=double(),ResidualMin=double(),ResidualQ1=double(),ResidualMedian=double(),ResidualMean=double(),ResidualQ3=double(),ResidualMax=double(),NaNum=double())
#####################Snow Multiple Thread###########################
clusterfun<-function(i){
print(i)
TrainModel<-cbind(setnames(TrainDT[7:nrow(TrainDT),i,with=F],paste0(names(TrainDT[1,i,with=FALSE]),'_y')),TrainDT[1:(nrow(TrainDT)-6),1:length(TrainDT),with=F])
PracticeModel<-cbind(setnames(PracticeDT[7:nrow(PracticeDT),i,with=F],paste0(names(TrainDT[1,i,with=FALSE]),'_y')),PracticeDT[1:(nrow(PracticeDT)-6),1:length(PracticeDT),with=F])
resp<-grep('_y',names(TrainModel),value=T)
pre<-grep(FreewayNum,names(TrainModel),value =T)
pre<-pre[2:length(pre)]
###### Functions ####
remove_outliers<-function(x,na.rm=T)
{
qnt<-quantile(x,probs = c(.25,.75),na.rm = na.rm)
H<-1.5*IQR(x,na.rm = na.rm)
y<-x
y[x<(qnt[1]-H)]<-NA
y[x>(qnt[2]+H)]<-NA
return(y)
}
addq<-function(x) paste0("`",x, "`")
Model<-as.formula(paste(addq(resp),paste(lapply(pre, addq),collapse = '+'),sep = '~'))
FitModel<-lm(Model,data=TrainModel)
#Fitmodel<-lm(`01F0017S_y`~.,data=TrainModel)
#Fitmodel<-lm(as.matrix(TrainDT[7:nrow(TrainDT),i,with=F])~as.matrix(TrainDT[1:(nrow(TrainDT)-6),1:length(TrainDT),with=F]),data=TrainDT)
stepwise<-step(FitModel,sacle=0,direction = 'both')
predictresidual<-PracticeModel[[1]]-predict(stepwise,PracticeModel)
RMSE<-rmse(remove_outliers(predict(stepwise,PracticeModel)),remove_outliers(PracticeModel[[1]]),na.rm = T)
Gantryname<-names(TrainDT[1,i,with=FALSE])
write.csv(stepwise$coefficients,file = paste0(Gantryname,'_Coefficients','.csv'))
write.csv(cbind(TrainModel[[1]],stepwise$fitted.values,stepwise$residuals),file = paste0(Gantryname,'_Residual','.csv'))
write.csv(cbind(PracticeModel[[1]],predict(stepwise,PracticeModel),predictresidual),file = paste0(Gantryname,'_Predict','.csv'))
SumResidual<-summary(remove_outliers(predictresidual))
unlistResidual<-data.frame(matrix(unlist(SumResidual),ncol = 7,byrow = T))
summaryDF<-rbind(summaryDF,cbind(Gantryname,cor(PracticeModel[[1]],predict(stepwise,PracticeModel)),RMSE,unlistResidual))
##summaryTest<-rbind(summaryTest,cbind(data.frame(t(as.matrix(unlist(stepwise$coefficients),ncol=length(stepwise$coefficients),byrow=T)))))
#PredictData<-predict(stepwise,PracticeDT)
if (i ==3)({
colnames(summaryDF)<-c('GantryID','Cor','RMSE','ResidualMin','ResidualQ1','ResidualMedian','ResidualMean','ResidualQ3','ResidualMax','NaNum')
write.csv(summaryDF,file = 'Summary.csv')
})
}
cluster <- makeCluster(type="SOCK",c("localhost", "localhost", "localhost", "localhost"))
clusterEvalQ(cluster,c(library(data.table),library(hydroGOF)))
clusterExport(cluster,c('TrainDT','DT','PracticeDT','FreewayNum','summaryDF'))
stopCluster(cluster)
bGlicmFyeShkcGx5cikgICMgZGF0YSBmdW5jdGlvbgpsaWJyYXJ5KGRhdGEudGFibGUpICMgZGF0YSBmb3JtYXQKbGlicmFyeShzbm93KSAgI3BhcmFsbGUgY29tcHV0aW5nCmxpYnJhcnkoaHlkcm9HT0YpICAjUk1TRQoKCnNldC5zZWVkKDUyNDI4NykKCkNhcnR5cGU8LSczMScKRnJlZXdheU51bTwtJzAxJwpEVDwtZnJlYWQoIkM6L1VzZXJzL2x3YW5nMi9Eb2N1bWVudHMvUi8yMDE2MDQyMFNwZWVkQ291bnQvMDFTXzIwMTYwMzAxXzIwMTYwMzMxLmNzdiIsbmEuc3RyaW5ncyA9ICdOVUxMJykKRFQgPC0gRFRbLHdoaWNoKHVubGlzdChsYXBwbHkoRFQsIGZ1bmN0aW9uKHgpIWFsbChpcy5uYSh4KSkpKSksd2l0aD1GXQpEVDwtZmlsdGVyKERULENhdGVnb3J5PT1DYXJ0eXBlKQoKZHJvcHM8LWMoJ0RhdGUnLCdDYXRlZ29yeScsJ0hvdXInKQpEVFssZHJvcHNbMTpsZW5ndGgoZHJvcHMpXTo9TlVMTF0KCgojIHJlcGxhY2UgbWlzc2luZyB2YWx1ZXMgd2l0aCBOb3JtYWwgZGlzdHJpYnV0aW9uIE1lYW4oY29sKSBzZChjb2wpCgojZm9yKGogaW4gc2VxX2xlbihuY29sKERUKSkpCiN7CiMgICMjU3lzLnNsZWVwKDAuMSkKIyAgc2V0KERULHdoaWNoKGlzLm5hKERUW1tqXV0pKSxqLGFicyhybm9ybSg1MDAwLG1lYW49bWVhbihEVFtbal1dLG5hLnJtPVQpLHNkPXNkKERUW1tqXV0sbmEucm0gPSBUKSkpKQojICAjI3ByaW50KGopIAojICAjI3NldFR4dFByb2dyZXNzQmFyKHBiLCBpKQojfQoKIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMgUmVwbGFjZSBOQSBieSBFYWNoIENvbHVtbiBNaW4jIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyAKZm9yKGogaW4gc2VxX2xlbihuY29sKERUKSkpCnsKICAjI1N5cy5zbGVlcCgwLjEpCiAgc2V0KERULHdoaWNoKGlzLm5hKERUW1tqXV0pKSxqLG1pbihEVFtbal1dLG5hLnJtID0gVCkpCiAgIyNwcmludChqKSAKICAjI3NldFR4dFByb2dyZXNzQmFyKHBiLCBpKQp9CgoKVHJhaW5EVDwtRFRbMTpyb3VuZCgwLjcqbnJvdyhEVCksMCldClRyYWluRFQ8LXNlbGVjdChUcmFpbkRULHN0YXJ0c193aXRoKEZyZWV3YXlOdW0pKQoKUHJhY3RpY2VEVDwtRFRbcm91bmQoMC43Km5yb3coRFQpLDApOm5yb3coRFQpXQpQcmFjdGljZURUPC1zZWxlY3QoUHJhY3RpY2VEVCxzdGFydHNfd2l0aChGcmVld2F5TnVtKSkKCnN1bW1hcnlERjwtZGF0YS5mcmFtZShHYW50cnlJRD1jaGFyYWN0ZXIoKSxDb3I9ZG91YmxlKCksUk1TRT1kb3VibGUoKSxSZXNpZHVhbE1pbj1kb3VibGUoKSxSZXNpZHVhbFExPWRvdWJsZSgpLFJlc2lkdWFsTWVkaWFuPWRvdWJsZSgpLFJlc2lkdWFsTWVhbj1kb3VibGUoKSxSZXNpZHVhbFEzPWRvdWJsZSgpLFJlc2lkdWFsTWF4PWRvdWJsZSgpLE5hTnVtPWRvdWJsZSgpKQoKCgojIyMjIyMjIyMjIyMjIyMjIyMjIyNTbm93IE11bHRpcGxlIFRocmVhZCMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwpjbHVzdGVyZnVuPC1mdW5jdGlvbihpKXsKICBwcmludChpKQogIFRyYWluTW9kZWw8LWNiaW5kKHNldG5hbWVzKFRyYWluRFRbNzpucm93KFRyYWluRFQpLGksd2l0aD1GXSxwYXN0ZTAobmFtZXMoVHJhaW5EVFsxLGksd2l0aD1GQUxTRV0pLCdfeScpKSxUcmFpbkRUWzE6KG5yb3coVHJhaW5EVCktNiksMTpsZW5ndGgoVHJhaW5EVCksd2l0aD1GXSkKICBQcmFjdGljZU1vZGVsPC1jYmluZChzZXRuYW1lcyhQcmFjdGljZURUWzc6bnJvdyhQcmFjdGljZURUKSxpLHdpdGg9Rl0scGFzdGUwKG5hbWVzKFRyYWluRFRbMSxpLHdpdGg9RkFMU0VdKSwnX3knKSksUHJhY3RpY2VEVFsxOihucm93KFByYWN0aWNlRFQpLTYpLDE6bGVuZ3RoKFByYWN0aWNlRFQpLHdpdGg9Rl0pCiAgcmVzcDwtZ3JlcCgnX3knLG5hbWVzKFRyYWluTW9kZWwpLHZhbHVlPVQpCiAgcHJlPC1ncmVwKEZyZWV3YXlOdW0sbmFtZXMoVHJhaW5Nb2RlbCksdmFsdWUgPVQpCiAgcHJlPC1wcmVbMjpsZW5ndGgocHJlKV0KICAKICAjIyMjIyMgIEZ1bmN0aW9ucyAjIyMjCiAgcmVtb3ZlX291dGxpZXJzPC1mdW5jdGlvbih4LG5hLnJtPVQpCiAgewogICAgcW50PC1xdWFudGlsZSh4LHByb2JzID0gYyguMjUsLjc1KSxuYS5ybSA9IG5hLnJtKQogICAgSDwtMS41KklRUih4LG5hLnJtID0gbmEucm0pCiAgICB5PC14CiAgICB5W3g8KHFudFsxXS1IKV08LU5BCiAgICB5W3g+KHFudFsyXStIKV08LU5BCiAgICByZXR1cm4oeSkKICB9CiAgYWRkcTwtZnVuY3Rpb24oeCkgcGFzdGUwKCJgIix4LCAiYCIpCiAgCiAgCiAgCiAgTW9kZWw8LWFzLmZvcm11bGEocGFzdGUoYWRkcShyZXNwKSxwYXN0ZShsYXBwbHkocHJlLCBhZGRxKSxjb2xsYXBzZSA9ICcrJyksc2VwID0gJ34nKSkKICAKICBGaXRNb2RlbDwtbG0oTW9kZWwsZGF0YT1UcmFpbk1vZGVsKQogICNGaXRtb2RlbDwtbG0oYDAxRjAwMTdTX3lgfi4sZGF0YT1UcmFpbk1vZGVsKQogICNGaXRtb2RlbDwtbG0oYXMubWF0cml4KFRyYWluRFRbNzpucm93KFRyYWluRFQpLGksd2l0aD1GXSl+YXMubWF0cml4KFRyYWluRFRbMToobnJvdyhUcmFpbkRUKS02KSwxOmxlbmd0aChUcmFpbkRUKSx3aXRoPUZdKSxkYXRhPVRyYWluRFQpCiAgc3RlcHdpc2U8LXN0ZXAoRml0TW9kZWwsc2FjbGU9MCxkaXJlY3Rpb24gPSAnYm90aCcpCiAgcHJlZGljdHJlc2lkdWFsPC1QcmFjdGljZU1vZGVsW1sxXV0tcHJlZGljdChzdGVwd2lzZSxQcmFjdGljZU1vZGVsKQogIFJNU0U8LXJtc2UocmVtb3ZlX291dGxpZXJzKHByZWRpY3Qoc3RlcHdpc2UsUHJhY3RpY2VNb2RlbCkpLHJlbW92ZV9vdXRsaWVycyhQcmFjdGljZU1vZGVsW1sxXV0pLG5hLnJtID0gVCkKICBHYW50cnluYW1lPC1uYW1lcyhUcmFpbkRUWzEsaSx3aXRoPUZBTFNFXSkKICB3cml0ZS5jc3Yoc3RlcHdpc2UkY29lZmZpY2llbnRzLGZpbGUgPSBwYXN0ZTAoR2FudHJ5bmFtZSwnX0NvZWZmaWNpZW50cycsJy5jc3YnKSkKICB3cml0ZS5jc3YoY2JpbmQoVHJhaW5Nb2RlbFtbMV1dLHN0ZXB3aXNlJGZpdHRlZC52YWx1ZXMsc3RlcHdpc2UkcmVzaWR1YWxzKSxmaWxlID0gcGFzdGUwKEdhbnRyeW5hbWUsJ19SZXNpZHVhbCcsJy5jc3YnKSkKICB3cml0ZS5jc3YoY2JpbmQoUHJhY3RpY2VNb2RlbFtbMV1dLHByZWRpY3Qoc3RlcHdpc2UsUHJhY3RpY2VNb2RlbCkscHJlZGljdHJlc2lkdWFsKSxmaWxlID0gcGFzdGUwKEdhbnRyeW5hbWUsJ19QcmVkaWN0JywnLmNzdicpKQogIFN1bVJlc2lkdWFsPC1zdW1tYXJ5KHJlbW92ZV9vdXRsaWVycyhwcmVkaWN0cmVzaWR1YWwpKQogIHVubGlzdFJlc2lkdWFsPC1kYXRhLmZyYW1lKG1hdHJpeCh1bmxpc3QoU3VtUmVzaWR1YWwpLG5jb2wgPSA3LGJ5cm93ID0gVCkpCiAgc3VtbWFyeURGPC1yYmluZChzdW1tYXJ5REYsY2JpbmQoR2FudHJ5bmFtZSxjb3IoUHJhY3RpY2VNb2RlbFtbMV1dLHByZWRpY3Qoc3RlcHdpc2UsUHJhY3RpY2VNb2RlbCkpLFJNU0UsdW5saXN0UmVzaWR1YWwpKQogICMjc3VtbWFyeVRlc3Q8LXJiaW5kKHN1bW1hcnlUZXN0LGNiaW5kKGRhdGEuZnJhbWUodChhcy5tYXRyaXgodW5saXN0KHN0ZXB3aXNlJGNvZWZmaWNpZW50cyksbmNvbD1sZW5ndGgoc3RlcHdpc2UkY29lZmZpY2llbnRzKSxieXJvdz1UKSkpKSkKICAKICAjUHJlZGljdERhdGE8LXByZWRpY3Qoc3RlcHdpc2UsUHJhY3RpY2VEVCkKICBpZiAoaSA9PTMpKHsKICAgIGNvbG5hbWVzKHN1bW1hcnlERik8LWMoJ0dhbnRyeUlEJywnQ29yJywnUk1TRScsJ1Jlc2lkdWFsTWluJywnUmVzaWR1YWxRMScsJ1Jlc2lkdWFsTWVkaWFuJywnUmVzaWR1YWxNZWFuJywnUmVzaWR1YWxRMycsJ1Jlc2lkdWFsTWF4JywnTmFOdW0nKQogICAgd3JpdGUuY3N2KHN1bW1hcnlERixmaWxlID0gJ1N1bW1hcnkuY3N2JykKICB9KQogIAp9CgpjbHVzdGVyIDwtIG1ha2VDbHVzdGVyKHR5cGU9IlNPQ0siLGMoImxvY2FsaG9zdCIsICJsb2NhbGhvc3QiLCAibG9jYWxob3N0IiwgImxvY2FsaG9zdCIpKQoKY2x1c3RlckV2YWxRKGNsdXN0ZXIsYyhsaWJyYXJ5KGRhdGEudGFibGUpLGxpYnJhcnkoaHlkcm9HT0YpKSkKY2x1c3RlckV4cG9ydChjbHVzdGVyLGMoJ1RyYWluRFQnLCdEVCcsJ1ByYWN0aWNlRFQnLCdGcmVld2F5TnVtJywnc3VtbWFyeURGJykpCnN5c3RlbS50aW1lKHBhckxhcHBseShjbHVzdGVyLDE6MyxjbHVzdGVyZnVuKSkKCnN0b3BDbHVzdGVyKGNsdXN0ZXIpCgo=