# Q:想要擷取出網頁中某區塊標籤所有的文字(包含html標籤等)內的某一資料...結果是擷取出網頁內所有同屬性的資料...

# crawl library ----
library(bitops)
library(XML)
library(RCurl)

# only for windows -----
signatures <- system.file("CurlSSL", cainfo="cacert.pem", package="RCurl")

home <- "http://w...content-available-to-author-only...e.com/aussteller/messen/index.php?OK=1&sortierid=0&maxPerPage=20&i_cockpitkeyfindwo=2&i_cockpitkeyfindart=1&currPage=1"
home <- getURL(home, cainfo = signatures)
home <- htmlParse(home)

# 共有20筆的block
block <-getNodeSet(home, "//div[@class='shm']")

# length(block)  # 共20筆

doc <- block[[1]] #  抓取第一筆

# doc
# <div class="shm">      
#   <div class="listdates">   
#   <div class="date">08Jan-17Jan2016</div>
#   </div>   
#   <div class="search_result_list_box">
#   <div class="city">London, United Kingdom</div>
#   <div class="firma"><a href="show.php?id=352&amp;timer=m1452657261&amp;tmid=&amp;currPage=1&amp;maxPerPage=20&amp;params=timer%3Dm1452657261%26amp%3Btimer%3Dm1452657261%26amp%3Bi_cockpitkeyfindwo%3D2%26amp%3Bi_cockpitkeyfindart%3D1%26amp%3Bsortierid%3D0%26amp%3Btimer%3Dm1452657261%26amp%3BmaxPerPage%3D20%26amp%3BshowPrintlist%3D0%26amp%3BmaxPerPage%3D20">London Boat Show</a></div>
#   </div>
#   <div class="search_result_box_right">
#   <div class="branchen"><strong>Business sectors:</strong> Boats</div>
#   </div>
#   <div class="fixfloat"></div>
# </div> 

# 想要抓取 block[[1]]筆的 date
date <- xpathSApply(doc, "//div[@class='date']", xmlValue)

# 結果秀出網頁內全部 date 的資料
# [1] "08Jan-17Jan2016" "08Jan-17Jan2016" "09Jan-17Jan2016" "07Jan-15Jan2017" "06Jan-14Jan2018"
# [6] "09Jan-17Jan2016" "09Jan-17Jan2016" "09Jan-17Jan2016" "09Jan-17Jan2016" "10Jan-13Jan2016"
# [11] "10Jan-13Jan2016" "10Jan-13Jan2016" "10Jan-13Jan2016" "11Jan-13Jan2016" "11Jan-13Jan2016"
# [16] "11Jan-13Jan2016" "11Jan-14Jan2016" "11Jan-14Jan2016" "11Jan-14Jan2016" "11Jan-14Jan2016"
# [21] "11Jan-14Jan2016" "11Jan-24Jan2016" "09Jan-22Jan2017"# your code goes here