fork download
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4. import os
  5. import re
  6. import csv
  7. from math import *
  8.  
  9. def build_url(i, num_page):
  10. try:
  11. if (i == '2A' or i == '2B'):
  12. url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/{0}/0?page={1}'.format(i, num_page)
  13. elif (i < 10):
  14. url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/0{0}/0?page={1}'.format(i, num_page)
  15. else:
  16. url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/{0}/0?page={1}'.format(i, num_page)
  17. return(url)
  18. except:
  19. return None
  20.  
  21. def find_nb_results(url):
  22. try:
  23. headers = {"User-Agent":"Mozilla/5.0"}
  24. response = requests.get(url, headers=headers, timeout=5)
  25. soup = bs4.BeautifulSoup(response.text, 'lxml')
  26. target = (soup.find('h2', {'id': "cnsa-search-total-phone"}).text)
  27. target = ''.join(c for c in target if c.isdigit())
  28. return(target)
  29. except:
  30. return None
  31.  
  32. def find_ehpad(url, nb):
  33. try:
  34. headers = {"User-Agent":"Mozilla/5.0"}
  35. response = requests.get(url, headers=headers, timeout=5)
  36. soup = bs4.BeautifulSoup(response.text, 'lxml')
  37. target = (soup.findAll('div', {'class': "cnsa_results-item-inside"})[nb])
  38. name = target.find('h3', {'class': "cnsa_results-tags1"}).text
  39. price = target.find('div', {'class': "clearfix"}).find('div', {'class': "prix"}).find('strong').text
  40. return(name, price)
  41. except:
  42. # Gère le cas des EHPAD qui ne mentionnent pas le prix requis pour un hébergement en chambre seule
  43. headers = {"User-Agent":"Mozilla/5.0"}
  44. response = requests.get(url, headers=headers, timeout=5)
  45. soup = bs4.BeautifulSoup(response.text, 'lxml')
  46. target = (soup.findAll('div', {'class': "cnsa_results-item-inside"})[nb])
  47. name = target.find('h3', {'class': "cnsa_results-tags1"}).text
  48. return (name, 'non indiqué')
  49.  
  50. def find_results(i):
  51. ct = 0
  52. url = build_url(i, 0)
  53. nb_results = int(find_nb_results(url))
  54. nb_page_by_dep = ceil(nb_results/10) - 1
  55. while (ct <= nb_page_by_dep):
  56. div = 0
  57. url_in_progress = build_url(i, ct)
  58. ct = ct + 1
  59. if (ct < nb_page_by_dep):
  60. while(div < 10):
  61. name, price = find_ehpad(url_in_progress, div)
  62. div = div + 1
  63. else:
  64. total_div = nb_results - nb_page_by_dep * 10
  65. while(div < total_div):
  66. name, price = find_ehpad(url_in_progress, div)
  67. div = div + 1
  68.  
  69. # Recherche des résultats pour les départements de France métropolitaine
  70. i = 1
  71. while (i <= 95):
  72. find_results(i)
  73. print(i)
  74. i = i + 1
  75. break
  76. # Recherche des résultats pour les départements d'Outre-Mer.
  77. i = 971
  78. while (i <= 976):
  79. find_results(i)
  80. print(i)
  81. i = i + 1
  82. # Recherche des résultats pour la Corse
  83. find_results('2A')
  84. find_results('2B')
  85.  
Runtime error #stdin #stdout #stderr 0.33s 34236KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 2, in <module>
ModuleNotFoundError: No module named 'bs4'