# Sys.setlocale(locale = "cht") # for windows OS
library(rvest)
library(magrittr)
# 以下我主要靠 xpath 去寫,但應該不難看懂。
# 重點就是把頁面中第一個 div[@class='rt-tbody'] 之內的 div[@class='rt-td'] 都抓出來,
# 再逐列排在 ncol = 6 的 matrix 之中就完成了。
# 之後要不要轉 data.frame 或加上 colnames 或字串轉數字就先不說了,可以按需要自行操作。
read_html("http://p...content-available-to-author-only...w.org/unit/%E8%A1%8C%E6%94%BF%E9%99%A2%E7%92%B0%E5%A2%83%E4%BF%9D%E8%AD%B7%E7%BD%B2") %>%
html_nodes(xpath = "//div[@class='rt-tbody']") %>%
.[[1]] %>%
html_nodes(xpath = "div/div/div[@class='rt-td']") %>%
html_text() %>%
matrix(ncol = 6, byrow = T)
# 結果為 100 * 6 的 matrix
IyBTeXMuc2V0bG9jYWxlKGxvY2FsZSA9ICJjaHQiKSAjIGZvciB3aW5kb3dzIE9TCmxpYnJhcnkocnZlc3QpCmxpYnJhcnkobWFncml0dHIpCgojIOS7peS4i+aIkeS4u+imgemdoCB4cGF0aCDljrvlr6vvvIzkvYbmh4noqbLkuI3pm6PnnIvmh4LjgIIKIyDph43pu57lsLHmmK/miorpoIHpnaLkuK3nrKzkuIDlgIsgZGl2W0BjbGFzcz0ncnQtdGJvZHknXSDkuYvlhafnmoQgZGl2W0BjbGFzcz0ncnQtdGQnXSDpg73mipPlh7rkvobvvIwKIyDlho3pgJDliJfmjpLlnKggbmNvbCA9IDYg55qEIG1hdHJpeCDkuYvkuK3lsLHlrozmiJDkuobjgIIKIyDkuYvlvozopoHkuI3opoHovYkgZGF0YS5mcmFtZSDmiJbliqDkuIogY29sbmFtZXMg5oiW5a2X5Liy6L2J5pW45a2X5bCx5YWI5LiN6Kqq5LqG77yM5Y+v5Lul5oyJ6ZyA6KaB6Ieq6KGM5pON5L2c44CCCnJlYWRfaHRtbCgiaHR0cDovL3AuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLncub3JnL3VuaXQvJUU4JUExJThDJUU2JTk0JUJGJUU5JTk5JUEyJUU3JTkyJUIwJUU1JUEyJTgzJUU0JUJGJTlEJUU4JUFEJUI3JUU3JUJEJUIyIikgJT4lCiAgaHRtbF9ub2Rlcyh4cGF0aCA9ICIvL2RpdltAY2xhc3M9J3J0LXRib2R5J10iKSAlPiUKICAuW1sxXV0gJT4lCiAgaHRtbF9ub2Rlcyh4cGF0aCA9ICJkaXYvZGl2L2RpdltAY2xhc3M9J3J0LXRkJ10iKSAlPiUKICBodG1sX3RleHQoKSAlPiUKICBtYXRyaXgobmNvbCA9IDYsIGJ5cm93ID0gVCkKIyDntZDmnpzngrogMTAwICogNiDnmoQgbWF0cml4