fork download
  1. import sys
  2. import csv
  3. import os
  4. import time
  5. import urllib
  6. import datetime
  7. from selenium import webdriver
  8. from selenium.webdriver.common.keys import Keys
  9. from selenium.webdriver.common.action_chains import ActionChains
  10. serviceurl = 'https://s...content-available-to-author-only...t.com/trending/Hindi'
  11.  
  12. files = "dataset_link_1.txt"
  13. if not os.path.exists(files):
  14. file(files, 'w').close()
  15. enter = open(files,'w');
  16.  
  17. url = serviceurl
  18. driver = webdriver.Firefox(executable_path='D:\CHIT CHAT\Scrapper\geckodriver');
  19. driver.maximize_window() #For maximizing window
  20. driver.get(url);
  21. driver.implicitly_wait(3) #gives an implicit wait for 10 seconds
  22. while driver.execute_script("return document.readyState") != 'complete':
  23. pass;
  24.  
  25. for i in range(1,20):
  26.  
  27. SCROLL_PAUSE_TIME = 0.5
  28.  
  29. # Get scroll height
  30. last_height = driver.execute_script("return document.body.scrollHeight")
  31.  
  32. while True:
  33. # Scroll down to bottom
  34. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  35. # Wait to load page
  36. time.sleep(SCROLL_PAUSE_TIME)
  37. # Calculate new scroll height and compare with last scroll height
  38. new_height = driver.execute_script("return document.body.scrollHeight")
  39. if new_height == last_height:
  40. break
  41. last_height = new_height
  42.  
  43.  
  44. var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/a/div[3]/div[1]"%(i)).text.encode('utf-8')
  45. print(var) #No of watches
  46. enter.write("Total No of views:\n%s\n" %(var));
  47.  
  48. var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/a/div[1]/div[1]/span"%(i)).text.encode('utf-8')
  49. print(var) #Title
  50. enter.write("Title:\n%s\n" %(var));
  51.  
  52. var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[1]/a/div[2]/div/div[2]"%(i)).text.encode('utf-8')
  53. print(var) #owner bio
  54. enter.write("Writer's Bio:\n%s\n" %(var));
  55.  
  56. var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[1]/a/div[2]/div/div[1]/strong"%(i)).text.encode('utf-8')
  57. print(var) #owner's bio
  58. enter.write("Writer's Name:\n%s\n" %(var));
  59.  
  60. var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[2]/div/button[2]/div/span"%(i)).text.encode('utf-8')
  61. print(var) #comments
  62. enter.write("Total Comments:\n%s\n" %(var));
  63.  
  64. var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[2]/div/button[1]/div/span"%(i)).text.encode('utf-8')
  65. print(var) #whatsapp
  66. enter.write("Whatsapp Share:\n%s\n" %(var));
  67.  
  68. print()
  69. # driver.save_screenshot("captcha_%s.png"%(i))
  70.  
  71. driver.back()
  72.  
  73. driver.quit()
  74. enter.close()
  75.  
Runtime error #stdin #stdout #stderr 0.01s 30240KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 7, in <module>
ImportError: No module named 'selenium'