#載入檔案
getwd()
segtxt.dir <- 'D:/TestJeibaR/segtxt'
dir(segtxt.dir)
path.dir <- DirSource(segtxt.dir, encoding = 'UTF-8')


#做成corpus格式
insure <- VCorpus(path.dir)

#清理資料
remove_spe <- function(x) gsub("[[:punct:]]","",x)
remove_en <- function(x) gsub("[0-9a-zA-Z]","",x)

str.insure <- tm_map(insure, content_transformer(strsplit), split=" ")
str.insure <- tm_map(str.insure, content_transformer(remove_en))
str.insure <- tm_map(str.insure, content_transformer(remove_spe))

#tdm格式
tdm.str.insure <- TermDocumentMatrix(str.insure)

a <- data.frame()
a <- inspect(tdm.str.insure)

#看a以及寫出成csv
View(a)
write.table(a, file = "now.CSV", sep = ",")