fork download
  1. library(rvest)
  2. library(dplyr)
  3. library(stringr)
  4. library(xlsx)
  5. library(xopen)
  6. library(lattice)
  7. library(ggplot2)
  8. library(ggthemes)
  9. library(plyr)
  10.  
  11. first_page <- "https://w...content-available-to-author-only...t.nu/kategori.php?k=s427524853&catId=353&sort=price_include_shipping&direction=asc"
  12. urls <- NULL
  13.  
  14. # 產生欲爬的網頁連結 (第一頁到第七頁)
  15. for (i in 0:6){
  16. if (i==0){
  17. urls <- first_page
  18. } else{
  19. urls <- c(urls,paste0(first_page,"&offset=",24*i))
  20. }
  21. }
  22.  
  23. dt0 <- data.frame()
  24. for (url in urls){
  25. price_watch <- read_html(url) %>%
  26. html_nodes(".kGpEgQ")
  27.  
  28. ### 1.產品名稱
  29. Item <- price_watch %>% html_attr("aria-label")
  30. ### 2.產品品牌
  31. Brand <- word(Item,1) #用word()截取 Item 第一組字串
  32. ### 3.產品連結, 合併成完整URL
  33. Full_URL <- price_watch %>% html_attr("href") %>%
  34. paste0("https://w...content-available-to-author-only...t.nu", .)
  35. ### 4.產品價格,然後去除空格與kr字串,轉成數值
  36. Price <- price_watch %>%
  37. html_nodes(".bhjqZq") %>%
  38. html_text() %>%
  39. gsub("kr|\u00A0", "", .) %>%
  40. as.numeric
  41.  
  42. dt1 <- data.frame(Item, Brand, Full_URL, Price)
  43. dt0 <- rbind(dt1, dt0)
  44. }
  45.  
  46. #抓每個產品連結裡的規格
  47. spec <- data.frame()
  48. spec_url <- as.character(dt0$Full_URL)
  49.  
  50. for (j in 1:length(dt0$Full_URL)) {
  51. spec[j,1:6] <-
  52. read_html(spec_url[j], encoding = "UTF-8") %>%
  53. html_nodes(".iyOoKG") %>%
  54. html_text() %>%
  55. gsub("\u00A0","", .)
  56. }
  57.  
  58. names(spec)[1:6] <-c("CPU", "Display", "RAM", "GPU", "Resolution", "Weight (Kg)")
  59. spec$Display <- gsub("tum","", spec$Display)
  60. spec$`Weight (Kg)`<- gsub("kg","", spec$`Weight (Kg)`) %>% as.numeric()
  61.  
  62. dt0 <- cbind(dt0,spec)
  63. dt0 <- dt0[c(2,1,4,5:10,3)]
  64. dt0
Success #stdin #stdout #stderr 0.28s 38968KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error in library(rvest) : there is no package called ‘rvest’
Execution halted