import sys
import csv
import os
import time
import urllib
import datetime
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
serviceurl = 'https://s...content-available-to-author-only...t.com/trending/Hindi'
files = "dataset_link_1.txt"
if not os.path.exists(files):
file(files, 'w').close()
enter = open(files,'w');
url = serviceurl
driver = webdriver.Firefox(executable_path='D:\CHIT CHAT\Scrapper\geckodriver');
driver.maximize_window() #For maximizing window
driver.get(url);
driver.implicitly_wait(3) #gives an implicit wait for 10 seconds
while driver.execute_script("return document.readyState") != 'complete':
pass;
for i in range(1,20):
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/a/div[3]/div[1]"%(i)).text.encode('utf-8')
print(var) #No of watches
enter.write("Total No of views:\n%s\n" %(var));
var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/a/div[1]/div[1]/span"%(i)).text.encode('utf-8')
print(var) #Title
enter.write("Title:\n%s\n" %(var));
var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[1]/a/div[2]/div/div[2]"%(i)).text.encode('utf-8')
print(var) #owner bio
enter.write("Writer's Bio:\n%s\n" %(var));
var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[1]/a/div[2]/div/div[1]/strong"%(i)).text.encode('utf-8')
print(var) #owner's bio
enter.write("Writer's Name:\n%s\n" %(var));
var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[2]/div/button[2]/div/span"%(i)).text.encode('utf-8')
print(var) #comments
enter.write("Total Comments:\n%s\n" %(var));
var = driver.find_element_by_xpath("/html/body/div/div[1]/div/main/div[1]/div[2]/div/section/div[%s]/div/div/div[2]/div/button[1]/div/span"%(i)).text.encode('utf-8')
print(var) #whatsapp
enter.write("Whatsapp Share:\n%s\n" %(var));
print()
# driver.save_screenshot("captcha_%s.png"%(i))
driver.back()
driver.quit()
enter.close()