install.packages ( "rvest" )
install.packages ( "dplyr" )
install.packages ( "stringr" )
library( rvest)
library( dplyr)
library( stringr)
url = "https://w...content-available-to-author-only...t.nu/kategori.php?k=s426874315&catId=353&sort=price_include_shipping&direction=asc"
price_watch_html <- read_html( url, encoding= "UTF-8" ) %>%
html_nodes( ".kGpEgQ" )
#抓產品名稱
item_name <- price_watch_html %>%
html_attr( "aria-label" )
#抓產品連結
item_url <- price_watch_html %>%
html_attr( "href" )
item_full_url <- paste0( "http://p...content-available-to-author-only...t.nu" , item_url)
#抓產品價格--> Q1
item_price <- price_watch_html %>%
html_nodes( ".bhjqZq" ) %>%
html_text( )
nchar( item_price)
#抓每個產品的規格然後對應到 category--> Q2
for ( i in 1 : length( item_full_url) ) {
spec <- read_html( item_full_url[ i] , encoding = "UTF-8" ) %>%
html_nodes( ".iyOoKG" ) %>%
html_text( )
print( spec)
}
print( spec)
Category <- c( "CPU" , "Display" , "RAM" , "GPU" , "Resolution" , "Weight" )
#抓產品規格 Table--> Q3
spec_table <- price_watch_html %>%
html_nodes( xpath = '//*[@id="main"]/table' ) %>%
html_table( )
spec_table
class( spec_table)
#產出一個 13x9 的 data frame
df <- data.frame ( Item = item_name, Price = item_price, URL = item_full_url)
aW5zdGFsbC5wYWNrYWdlcygicnZlc3QiKQppbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpCmluc3RhbGwucGFja2FnZXMoInN0cmluZ3IiKQoKbGlicmFyeShydmVzdCkKbGlicmFyeShkcGx5cikKbGlicmFyeShzdHJpbmdyKQoKdXJsID0gImh0dHBzOi8vdy4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4udC5udS9rYXRlZ29yaS5waHA/az1zNDI2ODc0MzE1JmNhdElkPTM1MyZzb3J0PXByaWNlX2luY2x1ZGVfc2hpcHBpbmcmZGlyZWN0aW9uPWFzYyIKcHJpY2Vfd2F0Y2hfaHRtbCA8LSByZWFkX2h0bWwodXJsLGVuY29kaW5nPSJVVEYtOCIpICU+JQogIGh0bWxfbm9kZXMoIi5rR3BFZ1EiKQoKI+aKk+eUouWTgeWQjeeosQppdGVtX25hbWUgPC0gcHJpY2Vfd2F0Y2hfaHRtbCAlPiUgCiAgaHRtbF9hdHRyKCJhcmlhLWxhYmVsIikKCiPmipPnlKLlk4HpgKPntZAKaXRlbV91cmwgPC0gcHJpY2Vfd2F0Y2hfaHRtbCAlPiUgCiAgaHRtbF9hdHRyKCJocmVmIikKaXRlbV9mdWxsX3VybCA8LSBwYXN0ZTAoImh0dHA6Ly9wLi4uY29udGVudC1hdmFpbGFibGUtdG8tYXV0aG9yLW9ubHkuLi50Lm51IixpdGVtX3VybCkKCiPmipPnlKLlk4Hlg7nmoLwtLT4gUTEKaXRlbV9wcmljZSA8LSBwcmljZV93YXRjaF9odG1sICU+JSAKICBodG1sX25vZGVzKCIuYmhqcVpxIikgJT4lCiAgaHRtbF90ZXh0KCkKbmNoYXIoaXRlbV9wcmljZSkKCiPmipPmr4/lgIvnlKLlk4HnmoTopo/moLznhLblvozlsI3mh4nliLAgY2F0ZWdvcnktLT4gUTIKZm9yIChpIGluIDE6bGVuZ3RoKGl0ZW1fZnVsbF91cmwpKXsKICBzcGVjIDwtIHJlYWRfaHRtbChpdGVtX2Z1bGxfdXJsW2ldLGVuY29kaW5nID0gIlVURi04IikgJT4lCiAgICBodG1sX25vZGVzKCIuaXlPb0tHIikgJT4lCiAgICBodG1sX3RleHQoKQogIHByaW50KHNwZWMpCn0KcHJpbnQoc3BlYykKCkNhdGVnb3J5IDwtIGMoIkNQVSIsICJEaXNwbGF5IiwgIlJBTSIsICJHUFUiLCAiUmVzb2x1dGlvbiIsIldlaWdodCIpCgoj5oqT55Si5ZOB6KaP5qC8IFRhYmxlLS0+IFEzCnNwZWNfdGFibGUgPC0gcHJpY2Vfd2F0Y2hfaHRtbCAlPiUgCiAgaHRtbF9ub2Rlcyh4cGF0aCA9ICcvLypbQGlkPSJtYWluIl0vdGFibGUnKSAlPiUKICBodG1sX3RhYmxlKCkKc3BlY190YWJsZQpjbGFzcyhzcGVjX3RhYmxlKQoKCiPnlKLlh7rkuIDlgIsgMTN4OSDnmoQgZGF0YSBmcmFtZQpkZiA8LSBkYXRhLmZyYW1lKEl0ZW0gPSBpdGVtX25hbWUsIFByaWNlID0gaXRlbV9wcmljZSwgVVJMID0gaXRlbV9mdWxsX3VybCkK