from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import sys
import csv
import requests
import time, re
from bs4 import BeautifulSoup
browser = webdriver.Firefox()
BASE_URL = 'http://a...content-available-to-author-only...s.com/'
accept_next_alert = True
def getting_titles(url):
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, 'html.parser')
for link in soup.findAll('h3', {'class': 'grid-col__h3 grid-col__h3--recipe-grid'}):
title = link.string
return title
def save(recipes, path, fieldname):
with open(path, 'w') as csvfile:
fieldnames = ['Titles']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({'Titles': '#'})
def main(url=BASE_URL):
i = 0
for i in range(1, 3):
i += 1
driver = browser
delay = 5
paggination = '?page=' + str(i)
driver.get(BASE_URL + paggination)
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(10)
html_source = BASE_URL
data = html_source.encode('utf-8')
getting_titles(url + paggination)
if __name__ == "__main__":
main()