Ideone.com

fork download

from urllib.request import urlopen, urljoin
from bs4 import BeautifulSoup
 
 
def get_urls(url):
    topic_urls = []
    html = urlopen(url).read()
    soup = BeautifulSoup(html)
    # news feed
    links = soup.find_all('a', class_ = 'newsFeed_item_link' )
    for link in links:
        topic_urls.append(urljoin(url, link['href']))
    # next page
    next_page = soup.find_all('li', class_ = 'pagination_item pagination_item-next ' )
    if not next_page:
        next_url = None
    else:
        next_url = urljoin(url, next_page[0].a['href'])
    return {'topic': topic_urls, 'next': next_url}
 
def main():
	next_url = 'https://n...content-available-to-author-only...o.jp/topics/top-picks'
	topic_urls = []
	while next_url is not None:
		print(next_url)
		urls = get_urls(next_url)
		topic_urls.extend(urls['topic'])
		next_url = urls['next']
 
	print(topic_urls)

Runtime error #stdin #stdout #stderr 0.12s 23660KB

stdin

Standard input is empty

stdout

Standard output is empty

stderr

Traceback (most recent call last):
  File "./prog.py", line 2, in <module>
ModuleNotFoundError: No module named 'bs4'

https://ideone.com/0KvMNh

language:

Python 3 (python 3.9.5)

created:

visibility:

public

Share or Embed source code

Discover > Sphere Engine API

The brand new service which powers Ideone!

Discover > IDE Widget

Widget for compiling and running the source code in a web browser!

Discover > Sphere Engine API

Discover > IDE Widget

Choose your language