fork download
  1. #載入檔案
  2. getwd()
  3. segtxt.dir <- 'D:/TestJeibaR/segtxt'
  4. dir(segtxt.dir)
  5. path.dir <- DirSource(segtxt.dir, encoding = 'UTF-8')
  6.  
  7.  
  8. #做成corpus格式
  9. insure <- VCorpus(path.dir)
  10.  
  11. #清理資料
  12. remove_spe <- function(x) gsub("[[:punct:]]","",x)
  13. remove_en <- function(x) gsub("[0-9a-zA-Z]","",x)
  14.  
  15. str.insure <- tm_map(insure, content_transformer(strsplit), split=" ")
  16. str.insure <- tm_map(str.insure, content_transformer(remove_en))
  17. str.insure <- tm_map(str.insure, content_transformer(remove_spe))
  18.  
  19. #tdm格式
  20. tdm.str.insure <- TermDocumentMatrix(str.insure)
  21.  
  22. a <- data.frame()
  23. a <- inspect(tdm.str.insure)
  24.  
  25. #看a以及寫出成csv
  26. View(a)
  27. write.table(a, file = "now.CSV", sep = ",")
Success #stdin #stdout #stderr 0.22s 79168KB
stdin
Standard input is empty
stdout
[1] "/tmp/hY4Zyl"
character(0)
stderr
Error: could not find function "DirSource"
Execution halted