library(rvest)
library(dplyr)
library(stringr)
library(xlsx)
library(xopen)
library(lattice)
library(ggplot2)
library(ggthemes)
library(plyr)
first_page <- "https://w...content-available-to-author-only...t.nu/kategori.php?k=s427524853&catId=353&sort=price_include_shipping&direction=asc"
urls <- NULL
# 產生欲爬的網頁連結 (第一頁到第七頁)
for (i in 0:6){
if (i==0){
urls <- first_page
} else{
urls <- c(urls,paste0(first_page,"&offset=",24*i))
}
}
dt0 <- data.frame()
for (url in urls){
price_watch <- read_html(url) %>%
html_nodes(".kGpEgQ")
### 1.產品名稱
Item <- price_watch %>% html_attr("aria-label")
### 2.產品品牌
Brand <- word(Item,1) #用word()截取 Item 第一組字串
### 3.產品連結, 合併成完整URL
Full_URL <- price_watch %>% html_attr("href") %>%
paste0("https://w...content-available-to-author-only...t.nu", .)
### 4.產品價格,然後去除空格與kr字串,轉成數值
Price <- price_watch %>%
html_nodes(".bhjqZq") %>%
html_text() %>%
gsub("kr|\u00A0", "", .) %>%
as.numeric
dt1 <- data.frame(Item, Brand, Full_URL, Price)
dt0 <- rbind(dt1, dt0)
}
#抓每個產品連結裡的規格
spec <- data.frame()
spec_url <- as.character(dt0$Full_URL)
for (j in 1:length(dt0$Full_URL)) {
spec[j,1:6] <-
read_html(spec_url[j], encoding = "UTF-8") %>%
html_nodes(".iyOoKG") %>%
html_text() %>%
gsub("\u00A0","", .)
}
names(spec)[1:6] <-c("CPU", "Display", "RAM", "GPU", "Resolution", "Weight (Kg)")
spec$Display <- gsub("tum","", spec$Display)
spec$`Weight (Kg)`<- gsub("kg","", spec$`Weight (Kg)`) %>% as.numeric()
dt0 <- cbind(dt0,spec)
dt0 <- dt0[c(2,1,4,5:10,3)]
dt0
bGlicmFyeShydmVzdCkKbGlicmFyeShkcGx5cikKbGlicmFyeShzdHJpbmdyKQpsaWJyYXJ5KHhsc3gpCmxpYnJhcnkoeG9wZW4pCmxpYnJhcnkobGF0dGljZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdndGhlbWVzKQpsaWJyYXJ5KHBseXIpCgpmaXJzdF9wYWdlIDwtICJodHRwczovL3cuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLnQubnUva2F0ZWdvcmkucGhwP2s9czQyNzUyNDg1MyZjYXRJZD0zNTMmc29ydD1wcmljZV9pbmNsdWRlX3NoaXBwaW5nJmRpcmVjdGlvbj1hc2MiCnVybHMgPC0gTlVMTAoKIyDnlKLnlJ/mrLLniKznmoTntrLpoIHpgKPntZAgKOesrOS4gOmggeWIsOesrOS4g+mggSkKZm9yIChpIGluIDA6Nil7CiAgaWYgKGk9PTApewogICAgdXJscyA8LSBmaXJzdF9wYWdlCiAgfSBlbHNlewogICAgdXJscyA8LSBjKHVybHMscGFzdGUwKGZpcnN0X3BhZ2UsIiZvZmZzZXQ9IiwyNCppKSkKICB9Cn0KCmR0MCA8LSBkYXRhLmZyYW1lKCkKZm9yICh1cmwgaW4gdXJscyl7CiAgcHJpY2Vfd2F0Y2ggPC0gcmVhZF9odG1sKHVybCkgJT4lCiAgICBodG1sX25vZGVzKCIua0dwRWdRIikKICAKICAjIyMgMS7nlKLlk4HlkI3nqLEKICBJdGVtIDwtIHByaWNlX3dhdGNoICU+JSBodG1sX2F0dHIoImFyaWEtbGFiZWwiKQogICMjIyAyLueUouWTgeWTgeeJjAogIEJyYW5kIDwtIHdvcmQoSXRlbSwxKSAj55Sod29yZCgp5oiq5Y+WIEl0ZW0g56ys5LiA57WE5a2X5LiyCiAgIyMjIDMu55Si5ZOB6YCj57WQLCDlkIjkvbXmiJDlrozmlbRVUkwKICBGdWxsX1VSTCA8LSBwcmljZV93YXRjaCAlPiUgaHRtbF9hdHRyKCJocmVmIikgJT4lIAogICAgcGFzdGUwKCJodHRwczovL3cuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLnQubnUiLCAuKQogICMjIyA0LueUouWTgeWDueagvO+8jOeEtuW+jOWOu+mZpOepuuagvOiIh2ty5a2X5Liy77yM6L2J5oiQ5pW45YC8CiAgUHJpY2UgPC0gcHJpY2Vfd2F0Y2ggJT4lIAogICAgaHRtbF9ub2RlcygiLmJoanFacSIpICU+JSAKICAgIGh0bWxfdGV4dCgpICU+JSAKICAgIGdzdWIoImtyfFx1MDBBMCIsICIiLCAuKSAlPiUgCiAgICBhcy5udW1lcmljCiAgCiAgZHQxIDwtIGRhdGEuZnJhbWUoSXRlbSwgQnJhbmQsIEZ1bGxfVVJMLCBQcmljZSkKICBkdDAgPC0gcmJpbmQoZHQxLCBkdDApCn0KCiPmipPmr4/lgIvnlKLlk4HpgKPntZDoo6HnmoTopo/moLwKc3BlYyA8LSBkYXRhLmZyYW1lKCkKc3BlY191cmwgPC0gYXMuY2hhcmFjdGVyKGR0MCRGdWxsX1VSTCkKCmZvciAoaiBpbiAxOmxlbmd0aChkdDAkRnVsbF9VUkwpKSB7CiAgc3BlY1tqLDE6Nl0gPC0gCiAgICByZWFkX2h0bWwoc3BlY191cmxbal0sIGVuY29kaW5nID0gIlVURi04IikgJT4lCiAgICBodG1sX25vZGVzKCIuaXlPb0tHIikgJT4lCiAgICBodG1sX3RleHQoKSAlPiUKICAgIGdzdWIoIlx1MDBBMCIsIiIsIC4pCn0KCm5hbWVzKHNwZWMpWzE6Nl0gPC1jKCJDUFUiLCAiRGlzcGxheSIsICJSQU0iLCAiR1BVIiwgIlJlc29sdXRpb24iLCAiV2VpZ2h0IChLZykiKQpzcGVjJERpc3BsYXkgPC0gZ3N1YigidHVtIiwiIiwgc3BlYyREaXNwbGF5KQpzcGVjJGBXZWlnaHQgKEtnKWA8LSBnc3ViKCJrZyIsIiIsIHNwZWMkYFdlaWdodCAoS2cpYCkgJT4lIGFzLm51bWVyaWMoKQoKZHQwIDwtIGNiaW5kKGR0MCxzcGVjKQpkdDAgPC0gZHQwW2MoMiwxLDQsNToxMCwzKV0KZHQw