library(dplyr) # data function
library(data.table) # data format
library(snow) #paralle computing
library(hydroGOF) #RMSE
set.seed(524287)
Cartype<-'31'
FreewayNum<-'01'
DT
<-fread("C:/Users/lwang2/Documents/R/20160420SpeedCount/01S_20160301_20160331.csv",na.
strings = 'NULL')DT <- DT[,which(unlist(lapply(DT, function(x)!all(is.na(x))))),with=F]
DT<-filter(DT,Category==Cartype)
drops<-c('Date','Category','Hour')
DT[,drops[1:length(drops)]:=NULL]
# replace missing values with Normal distribution Mean(col) sd(col)
#for(j in seq_len(ncol(DT)))
#{
# ##Sys.sleep(0.1)
# set(DT,which(is.na(DT[[j]])),j,abs(rnorm(5000,mean=mean(DT[[j]],na.rm=T),sd=sd(DT[[j]],na.rm = T))))
# ##print(j)
# ##setTxtProgressBar(pb, i)
#}
################################ Replace NA by Each Column Min################################
for(j in seq_len(ncol(DT)))
{
##Sys.sleep(0.1)
set(DT,which(is.na(DT[[j]])),j,min(DT[[j]],na.rm = T))
##print(j)
##setTxtProgressBar(pb, i)
}
TrainDT<-DT[1:round(0.7*nrow(DT),0)]
TrainDT<-select(TrainDT,starts_with(FreewayNum))
PracticeDT<-DT[round(0.7*nrow(DT),0):nrow(DT)]
PracticeDT<-select(PracticeDT,starts_with(FreewayNum))
summaryDF<-data.frame(GantryID=character(),Cor=double(),RMSE=double(),ResidualMin=double(),ResidualQ1=double(),ResidualMedian=double(),ResidualMean=double(),ResidualQ3=double(),ResidualMax=double(),NaNum=double())
#######Function Here: Remove outlier & addq####################################
remove_outliers<-function(x,na.rm=T)
{
qnt<-quantile(x,probs = c(.25,.75),na.rm = na.rm)
H<-1.5*IQR(x,na.rm = na.rm)
y<-x
y[x<(qnt[1]-H)]<-NA
y[x>(qnt[2]+H)]<-NA
return(y)
}
addq<-function(x) paste0("`",x, "`")
##Test<-data.frame()
##GantryTestName<-data.frame(Intercept=numeric(),cbind(Test,t(as.matrix(lapply(names(TrainDT),addq)))))
######Model with shift 30 Mins: y8:30=x8:00
{
print(i)
TrainModel<-cbind(setnames(TrainDT[7:nrow(TrainDT),i,with=F],paste0(names(TrainDT[1,i,with=FALSE]),'_y')),TrainDT[1:(nrow(TrainDT)-6),1:length(TrainDT),with=F])
PracticeModel<-cbind(setnames(PracticeDT[7:nrow(PracticeDT),i,with=F],paste0(names(TrainDT[1,i,with=FALSE]),'_y')),PracticeDT[1:(nrow(PracticeDT)-6),1:length(PracticeDT),with=F])
resp<-grep('_y',names(TrainModel),value=T)
pre<-grep(FreewayNum,names(TrainModel),value =T)
pre<-pre[2:length(pre)]
Model<-as.formula(paste(addq(resp),paste(lapply(pre, addq),collapse = '+'),sep = '~'))
FitModel<-lm(Model,data=TrainModel)
#Fitmodel<-lm(`01F0017S_y`~.,data=TrainModel)
#Fitmodel<-lm(as.matrix(TrainDT[7:nrow(TrainDT),i,with=F])~as.matrix(TrainDT[1:(nrow(TrainDT)-6),1:length(TrainDT),with=F]),data=TrainDT)
stepwise<-step(FitModel,sacle=0,direction = 'both')
predictresidual<-PracticeModel[[1]]-predict(stepwise,PracticeModel)
RMSE<-rmse(remove_outliers(predict(stepwise,PracticeModel)),remove_outliers(PracticeModel[[1]]),na.rm = T)
Gantryname<-names(TrainDT[1,i,with=FALSE])
write.csv(stepwise$coefficients,file = paste0(Gantryname,'_Coefficients','.csv'))
write.csv(cbind(TrainModel[[1]],stepwise$fitted.values,stepwise$residuals),file = paste0(Gantryname,'_Residual','.csv'))
write.csv(cbind(PracticeModel[[1]],predict(stepwise,PracticeModel),predictresidual),file = paste0(Gantryname,'_Predict','.csv'))
SumResidual<-summary(remove_outliers(predictresidual))
unlistResidual<-data.frame(matrix(unlist(SumResidual),ncol = 7,byrow = T))
summaryDF<-rbind(summaryDF,cbind(Gantryname,cor(PracticeModel[[1]],predict(stepwise,PracticeModel)),RMSE,unlistResidual))
##summaryTest<-rbind(summaryTest,cbind(data.frame(t(as.matrix(unlist(stepwise$coefficients),ncol=length(stepwise$coefficients),byrow=T)))))
#PredictData<-predict(stepwise,PracticeDT)
if (i ==2)({
colnames(summaryDF)<-c('GantryID','Cor','RMSE','ResidualMin','ResidualQ1','ResidualMedian','ResidualMean','ResidualQ3','ResidualMax','NaNum')
write.csv(summaryDF,file = 'Summary.csv')
})
})
bGlicmFyeShkcGx5cikgICMgZGF0YSBmdW5jdGlvbgpsaWJyYXJ5KGRhdGEudGFibGUpICMgZGF0YSBmb3JtYXQKbGlicmFyeShzbm93KSAgI3BhcmFsbGUgY29tcHV0aW5nCmxpYnJhcnkoaHlkcm9HT0YpICAjUk1TRQoKCnNldC5zZWVkKDUyNDI4NykKCkNhcnR5cGU8LSczMScKRnJlZXdheU51bTwtJzAxJwpEVDwtZnJlYWQoIkM6L1VzZXJzL2x3YW5nMi9Eb2N1bWVudHMvUi8yMDE2MDQyMFNwZWVkQ291bnQvMDFTXzIwMTYwMzAxXzIwMTYwMzMxLmNzdiIsbmEuc3RyaW5ncyA9ICdOVUxMJykKRFQgPC0gRFRbLHdoaWNoKHVubGlzdChsYXBwbHkoRFQsIGZ1bmN0aW9uKHgpIWFsbChpcy5uYSh4KSkpKSksd2l0aD1GXQpEVDwtZmlsdGVyKERULENhdGVnb3J5PT1DYXJ0eXBlKQoKZHJvcHM8LWMoJ0RhdGUnLCdDYXRlZ29yeScsJ0hvdXInKQpEVFssZHJvcHNbMTpsZW5ndGgoZHJvcHMpXTo9TlVMTF0KCgojIHJlcGxhY2UgbWlzc2luZyB2YWx1ZXMgd2l0aCBOb3JtYWwgZGlzdHJpYnV0aW9uIE1lYW4oY29sKSBzZChjb2wpCgojZm9yKGogaW4gc2VxX2xlbihuY29sKERUKSkpCiN7CiMgICMjU3lzLnNsZWVwKDAuMSkKIyAgc2V0KERULHdoaWNoKGlzLm5hKERUW1tqXV0pKSxqLGFicyhybm9ybSg1MDAwLG1lYW49bWVhbihEVFtbal1dLG5hLnJtPVQpLHNkPXNkKERUW1tqXV0sbmEucm0gPSBUKSkpKQojICAjI3ByaW50KGopIAojICAjI3NldFR4dFByb2dyZXNzQmFyKHBiLCBpKQojfQoKIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMgUmVwbGFjZSBOQSBieSBFYWNoIENvbHVtbiBNaW4jIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyAKZm9yKGogaW4gc2VxX2xlbihuY29sKERUKSkpCnsKICAjI1N5cy5zbGVlcCgwLjEpCiAgc2V0KERULHdoaWNoKGlzLm5hKERUW1tqXV0pKSxqLG1pbihEVFtbal1dLG5hLnJtID0gVCkpCiAgIyNwcmludChqKSAKICAjI3NldFR4dFByb2dyZXNzQmFyKHBiLCBpKQp9CgoKVHJhaW5EVDwtRFRbMTpyb3VuZCgwLjcqbnJvdyhEVCksMCldClRyYWluRFQ8LXNlbGVjdChUcmFpbkRULHN0YXJ0c193aXRoKEZyZWV3YXlOdW0pKQoKUHJhY3RpY2VEVDwtRFRbcm91bmQoMC43Km5yb3coRFQpLDApOm5yb3coRFQpXQpQcmFjdGljZURUPC1zZWxlY3QoUHJhY3RpY2VEVCxzdGFydHNfd2l0aChGcmVld2F5TnVtKSkKCnN1bW1hcnlERjwtZGF0YS5mcmFtZShHYW50cnlJRD1jaGFyYWN0ZXIoKSxDb3I9ZG91YmxlKCksUk1TRT1kb3VibGUoKSxSZXNpZHVhbE1pbj1kb3VibGUoKSxSZXNpZHVhbFExPWRvdWJsZSgpLFJlc2lkdWFsTWVkaWFuPWRvdWJsZSgpLFJlc2lkdWFsTWVhbj1kb3VibGUoKSxSZXNpZHVhbFEzPWRvdWJsZSgpLFJlc2lkdWFsTWF4PWRvdWJsZSgpLE5hTnVtPWRvdWJsZSgpKQoKCgojIyMjIyMjRnVuY3Rpb24gSGVyZTogUmVtb3ZlIG91dGxpZXIgJiBhZGRxIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCgpyZW1vdmVfb3V0bGllcnM8LWZ1bmN0aW9uKHgsbmEucm09VCkKewpxbnQ8LXF1YW50aWxlKHgscHJvYnMgPSBjKC4yNSwuNzUpLG5hLnJtID0gbmEucm0pCkg8LTEuNSpJUVIoeCxuYS5ybSA9IG5hLnJtKQp5PC14CnlbeDwocW50WzFdLUgpXTwtTkEKeVt4PihxbnRbMl0rSCldPC1OQQpyZXR1cm4oeSkKfQphZGRxPC1mdW5jdGlvbih4KSBwYXN0ZTAoImAiLHgsICJgIikKIyNUZXN0PC1kYXRhLmZyYW1lKCkKIyNHYW50cnlUZXN0TmFtZTwtZGF0YS5mcmFtZShJbnRlcmNlcHQ9bnVtZXJpYygpLGNiaW5kKFRlc3QsdChhcy5tYXRyaXgobGFwcGx5KG5hbWVzKFRyYWluRFQpLGFkZHEpKSkpKQoKCgoKCiMjIyMjI01vZGVsIHdpdGggc2hpZnQgMzAgTWluczogeTg6MzA9eDg6MDAKc3lzdGVtLnRpbWUoZm9yIChpIGluIDE6MikKewogIHByaW50KGkpCiAgVHJhaW5Nb2RlbDwtY2JpbmQoc2V0bmFtZXMoVHJhaW5EVFs3Om5yb3coVHJhaW5EVCksaSx3aXRoPUZdLHBhc3RlMChuYW1lcyhUcmFpbkRUWzEsaSx3aXRoPUZBTFNFXSksJ195JykpLFRyYWluRFRbMToobnJvdyhUcmFpbkRUKS02KSwxOmxlbmd0aChUcmFpbkRUKSx3aXRoPUZdKQogIFByYWN0aWNlTW9kZWw8LWNiaW5kKHNldG5hbWVzKFByYWN0aWNlRFRbNzpucm93KFByYWN0aWNlRFQpLGksd2l0aD1GXSxwYXN0ZTAobmFtZXMoVHJhaW5EVFsxLGksd2l0aD1GQUxTRV0pLCdfeScpKSxQcmFjdGljZURUWzE6KG5yb3coUHJhY3RpY2VEVCktNiksMTpsZW5ndGgoUHJhY3RpY2VEVCksd2l0aD1GXSkKICByZXNwPC1ncmVwKCdfeScsbmFtZXMoVHJhaW5Nb2RlbCksdmFsdWU9VCkKICBwcmU8LWdyZXAoRnJlZXdheU51bSxuYW1lcyhUcmFpbk1vZGVsKSx2YWx1ZSA9VCkKICBwcmU8LXByZVsyOmxlbmd0aChwcmUpXQogIE1vZGVsPC1hcy5mb3JtdWxhKHBhc3RlKGFkZHEocmVzcCkscGFzdGUobGFwcGx5KHByZSwgYWRkcSksY29sbGFwc2UgPSAnKycpLHNlcCA9ICd+JykpCiAgCiAgRml0TW9kZWw8LWxtKE1vZGVsLGRhdGE9VHJhaW5Nb2RlbCkKICAjRml0bW9kZWw8LWxtKGAwMUYwMDE3U195YH4uLGRhdGE9VHJhaW5Nb2RlbCkKICAjRml0bW9kZWw8LWxtKGFzLm1hdHJpeChUcmFpbkRUWzc6bnJvdyhUcmFpbkRUKSxpLHdpdGg9Rl0pfmFzLm1hdHJpeChUcmFpbkRUWzE6KG5yb3coVHJhaW5EVCktNiksMTpsZW5ndGgoVHJhaW5EVCksd2l0aD1GXSksZGF0YT1UcmFpbkRUKQogIHN0ZXB3aXNlPC1zdGVwKEZpdE1vZGVsLHNhY2xlPTAsZGlyZWN0aW9uID0gJ2JvdGgnKQogIHByZWRpY3RyZXNpZHVhbDwtUHJhY3RpY2VNb2RlbFtbMV1dLXByZWRpY3Qoc3RlcHdpc2UsUHJhY3RpY2VNb2RlbCkKICBSTVNFPC1ybXNlKHJlbW92ZV9vdXRsaWVycyhwcmVkaWN0KHN0ZXB3aXNlLFByYWN0aWNlTW9kZWwpKSxyZW1vdmVfb3V0bGllcnMoUHJhY3RpY2VNb2RlbFtbMV1dKSxuYS5ybSA9IFQpCiAgR2FudHJ5bmFtZTwtbmFtZXMoVHJhaW5EVFsxLGksd2l0aD1GQUxTRV0pCiAgd3JpdGUuY3N2KHN0ZXB3aXNlJGNvZWZmaWNpZW50cyxmaWxlID0gcGFzdGUwKEdhbnRyeW5hbWUsJ19Db2VmZmljaWVudHMnLCcuY3N2JykpCiAgd3JpdGUuY3N2KGNiaW5kKFRyYWluTW9kZWxbWzFdXSxzdGVwd2lzZSRmaXR0ZWQudmFsdWVzLHN0ZXB3aXNlJHJlc2lkdWFscyksZmlsZSA9IHBhc3RlMChHYW50cnluYW1lLCdfUmVzaWR1YWwnLCcuY3N2JykpCiAgd3JpdGUuY3N2KGNiaW5kKFByYWN0aWNlTW9kZWxbWzFdXSxwcmVkaWN0KHN0ZXB3aXNlLFByYWN0aWNlTW9kZWwpLHByZWRpY3RyZXNpZHVhbCksZmlsZSA9IHBhc3RlMChHYW50cnluYW1lLCdfUHJlZGljdCcsJy5jc3YnKSkKICBTdW1SZXNpZHVhbDwtc3VtbWFyeShyZW1vdmVfb3V0bGllcnMocHJlZGljdHJlc2lkdWFsKSkKICB1bmxpc3RSZXNpZHVhbDwtZGF0YS5mcmFtZShtYXRyaXgodW5saXN0KFN1bVJlc2lkdWFsKSxuY29sID0gNyxieXJvdyA9IFQpKQogIHN1bW1hcnlERjwtcmJpbmQoc3VtbWFyeURGLGNiaW5kKEdhbnRyeW5hbWUsY29yKFByYWN0aWNlTW9kZWxbWzFdXSxwcmVkaWN0KHN0ZXB3aXNlLFByYWN0aWNlTW9kZWwpKSxSTVNFLHVubGlzdFJlc2lkdWFsKSkKICAjI3N1bW1hcnlUZXN0PC1yYmluZChzdW1tYXJ5VGVzdCxjYmluZChkYXRhLmZyYW1lKHQoYXMubWF0cml4KHVubGlzdChzdGVwd2lzZSRjb2VmZmljaWVudHMpLG5jb2w9bGVuZ3RoKHN0ZXB3aXNlJGNvZWZmaWNpZW50cyksYnlyb3c9VCkpKSkpCiAgCiAgI1ByZWRpY3REYXRhPC1wcmVkaWN0KHN0ZXB3aXNlLFByYWN0aWNlRFQpCiAgaWYgKGkgPT0yKSh7CiAgICBjb2xuYW1lcyhzdW1tYXJ5REYpPC1jKCdHYW50cnlJRCcsJ0NvcicsJ1JNU0UnLCdSZXNpZHVhbE1pbicsJ1Jlc2lkdWFsUTEnLCdSZXNpZHVhbE1lZGlhbicsJ1Jlc2lkdWFsTWVhbicsJ1Jlc2lkdWFsUTMnLCdSZXNpZHVhbE1heCcsJ05hTnVtJykKICAgIHdyaXRlLmNzdihzdW1tYXJ5REYsZmlsZSA9ICdTdW1tYXJ5LmNzdicpCiAgfSkKICAKfSk=