fork download
  1. # A resumable Safari Books Online Video downloader
  2. # Main reference: https://m...content-available-to-author-only...b.io/tools/2017/02/02/safari-downloader.html
  3.  
  4. from bs4 import BeautifulSoup
  5. import requests
  6. import os
  7. import re
  8. import subprocess
  9. import unicodedata
  10. import string
  11.  
  12. import config
  13. # Create a config.py file with the following content:
  14. # class Config:
  15. # URL = 'https://w...content-available-to-author-only...e.com/library/view/strata-data-conference/9781491985373/'
  16. # DOMAIN = 'https://w...content-available-to-author-only...e.com'
  17. # OUTPUT_FOLDER = 'D:\\Strata Data Conference 2017 Singapore'
  18. # USERNAME = 'your_email_address'
  19. # PASSWORD = 'your_password'
  20. # DOWNLOADER = './youtube-dl.exe' # Please download from https://g...content-available-to-author-only...b.com/rg3/youtube-dl
  21.  
  22. class SafariDownloader:
  23.  
  24. def slugify(self, value):
  25. """
  26. Normalizes string, converts to lowercase, removes non-alpha characters,
  27. and converts spaces to hyphens.
  28. """
  29.  
  30. value = re.sub('[^\w\s-]', '', value).strip().lower()
  31. value = re.sub('[-\s]+', '-', value)
  32. return value
  33.  
  34. def __init__(self, url, output_folder, username, password, domain='https://w...content-available-to-author-only...e.com', downloader_path='./youtube-dl.exe'):
  35. self.output_folder = output_folder
  36. self.username = username
  37. self.password = password
  38. self.domain = domain
  39. self.downloader_path = downloader_path
  40.  
  41. req = requests.get(url)
  42. soup = BeautifulSoup(req.text, 'html.parser')
  43. self.topics = soup.find_all('li', class_='toc-level-1') # top-level topic titles
  44. # Update youtube-dl first
  45. subprocess.run([self.downloader_path, "-U"])
  46.  
  47. def validify(self, filename):
  48. valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
  49. valid_chars = frozenset(valid_chars)
  50. # The unicodedata.normalize call replaces accented characters with the unaccented equivalent,
  51. # which is better than simply stripping them out. After that all disallowed characters are removed.
  52. cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ascii', 'ignore').decode('ascii')
  53. return ''.join(c for c in cleaned_filename if c in valid_chars)
  54.  
  55. def download(self):
  56. i = 0
  57. for topic in self.topics:
  58. i = i + 1
  59. topic_name = '{:03d} - {}'.format(i, self.slugify(topic.a.text))
  60. # Creating folder to put the videos in
  61. save_folder = '{}/{}'.format(self.output_folder, topic_name)
  62. os.makedirs(save_folder, exist_ok=True)
  63. # You can choose to skip these topic_name, comment these three lines if you do not want to skip any
  64. if topic_name in ('Keynotes', 'Strata Business Summit', 'Sponsored'):
  65. print("Skipping {}...".format(topic_name))
  66. continue
  67. for index, video in enumerate(topic.ol.find_all('a')):
  68. video_name = '{:03d} - {}'.format(index + 1, video.text)
  69. video_name = self.slugify(video_name)
  70. video_name = self.validify(video_name)
  71. video_url = video.get('href')
  72. video_out = '{}/{}.mp4'.format(save_folder, video_name)
  73. # Check if file already exists
  74. if os.path.isfile(video_out):
  75. print("File {} already exists! Skipping...".format(video_out))
  76. continue
  77. print("Downloading {} ...".format(video_name))
  78. print("Link {} ...".format(video_url))
  79. subprocess.run([self.downloader_path, "-u", self.username, "-p", self.password, "--verbose", "--output", video_out, video_url])
  80.  
  81. if __name__ == '__main__':
  82. app_config = config.Config
  83. downloader = SafariDownloader(url=app_config.URL, output_folder=app_config.OUTPUT_FOLDER,
  84. username=app_config.USERNAME, password=app_config.PASSWORD,
  85. domain=app_config.DOMAIN, downloader_path=app_config.DOWNLOADER)
  86. downloader.download()
Runtime error #stdin #stdout #stderr 0.16s 26808KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 5, in <module>
ImportError: No module named 'requests'