#載入檔案
getwd()
segtxt.dir <- 'D:/TestJeibaR/segtxt'
dir(segtxt.dir)
path.dir <- DirSource(segtxt.dir, encoding = 'UTF-8')
#做成corpus格式
insure <- VCorpus(path.dir)
#清理資料
remove_spe <- function(x) gsub("[[:punct:]]","",x)
remove_en <- function(x) gsub("[0-9a-zA-Z]","",x)
str.insure <- tm_map(insure, content_transformer(strsplit), split=" ")
str.insure <- tm_map(str.insure, content_transformer(remove_en))
str.insure <- tm_map(str.insure, content_transformer(remove_spe))
#tdm格式
tdm.str.insure <- TermDocumentMatrix(str.insure)
a <- data.frame()
a <- inspect(tdm.str.insure)
#看a以及寫出成csv
View(a)
write.table(a, file = "now.CSV", sep = ",")
I+i8ieWFpeaqlOahiApnZXR3ZCgpCnNlZ3R4dC5kaXIgPC0gJ0Q6L1Rlc3RKZWliYVIvc2VndHh0JwpkaXIoc2VndHh0LmRpcikKcGF0aC5kaXIgPC0gRGlyU291cmNlKHNlZ3R4dC5kaXIsIGVuY29kaW5nID0gJ1VURi04JykKCgoj5YGa5oiQY29ycHVz5qC85byPCmluc3VyZSA8LSBWQ29ycHVzKHBhdGguZGlyKQoKI+a4heeQhuizh+aWmQpyZW1vdmVfc3BlIDwtIGZ1bmN0aW9uKHgpIGdzdWIoIltbOnB1bmN0Ol1dIiwiIix4KQpyZW1vdmVfZW4gPC0gZnVuY3Rpb24oeCkgZ3N1YigiWzAtOWEtekEtWl0iLCIiLHgpCgpzdHIuaW5zdXJlIDwtIHRtX21hcChpbnN1cmUsIGNvbnRlbnRfdHJhbnNmb3JtZXIoc3Ryc3BsaXQpLCBzcGxpdD0iICIpCnN0ci5pbnN1cmUgPC0gdG1fbWFwKHN0ci5pbnN1cmUsIGNvbnRlbnRfdHJhbnNmb3JtZXIocmVtb3ZlX2VuKSkKc3RyLmluc3VyZSA8LSB0bV9tYXAoc3RyLmluc3VyZSwgY29udGVudF90cmFuc2Zvcm1lcihyZW1vdmVfc3BlKSkKCiN0ZG3moLzlvI8KdGRtLnN0ci5pbnN1cmUgPC0gVGVybURvY3VtZW50TWF0cml4KHN0ci5pbnN1cmUpCgphIDwtIGRhdGEuZnJhbWUoKQphIDwtIGluc3BlY3QodGRtLnN0ci5pbnN1cmUpCgoj55yLYeS7peWPiuWvq+WHuuaIkGNzdgpWaWV3KGEpCndyaXRlLnRhYmxlKGEsIGZpbGUgPSAibm93LkNTViIsIHNlcCA9ICIsIik=