Ideone.com

fork download

copy

library(tm)
library(tmcn)
library(Rwordseg)
 
d.corpus <- Corpus(DirSource("docs", encoding = 'UTF-8'))
 
# 清除標點符號, 數字
d.corpus <- tm_map(d.corpus, removePunctuation)
d.corpus <- tm_map(d.corpus, removeNumbers)
# 清除大小寫英文與數字
d.corpus <- tm_map(d.corpus, function(word) {
  gsub("[A-Za-z0-9]", "", word)
})
 
d.corpus <- tm_map(d.corpus, segmentCN, nature = TRUE)
 
d.corpus <- Corpus(VectorSource(d.corpus))
 
myStopWords <- c(stopwordsCN(), "編輯", "時間", "標題", "發信", "實業", "作者", "！")
d.corpus <- tm_map(d.corpus, removeWords, myStopWords)
 
tdm <- TermDocumentMatrix(d.corpus, control = list(wordLengths = c(2, Inf)))
 
inspect(tdm)

Runtime error #stdin #stdout #stderr 0.82s 23176KB

stdin

copy

Standard input is empty

stdout

copy

Standard output is empty

stderr

copy

Error in library(tmcn) : there is no package called 'tmcn'
Execution halted

https://ideone.com/icNr73

language:

R (R 3.5.2)

created:

visibility:

public

Share or Embed source code

Discover > Sphere Engine API

The brand new service which powers Ideone!

Discover > IDE Widget

Widget for compiling and running the source code in a web browser!

Discover > Sphere Engine API

Discover > IDE Widget

Choose your language