import requests
import bs4
import pandas as pd
import os
import re
import csv
from math import *
def build_url(i, num_page):
try:
if (i == '2A' or i == '2B'):
url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/{0}/0?page={1}'.format(i, num_page)
elif (i < 10):
url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/0{0}/0?page={1}'.format(i, num_page)
else:
url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/{0}/0?page={1}'.format(i, num_page)
return(url)
except:
return None
def find_nb_results(url):
try:
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
soup = bs4.BeautifulSoup(response.text, 'lxml')
target = (soup.find('h2', {'id': "cnsa-search-total-phone"}).text)
target = ''.join(c for c in target if c.isdigit())
return(target)
except:
return None
def find_ehpad(url, nb):
try:
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
soup = bs4.BeautifulSoup(response.text, 'lxml')
target = (soup.findAll('div', {'class': "cnsa_results-item-inside"})[nb])
name = target.find('h3', {'class': "cnsa_results-tags1"}).text
price = target.find('div', {'class': "clearfix"}).find('div', {'class': "prix"}).find('strong').text
return(name, price)
except:
# Gère le cas des EHPAD qui ne mentionnent pas le prix requis pour un hébergement en chambre seule
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
soup = bs4.BeautifulSoup(response.text, 'lxml')
target = (soup.findAll('div', {'class': "cnsa_results-item-inside"})[nb])
name = target.find('h3', {'class': "cnsa_results-tags1"}).text
return (name, 'non indiqué')
def find_results(i):
ct = 0
url = build_url(i, 0)
nb_results = int(find_nb_results(url))
nb_page_by_dep = ceil(nb_results/10) - 1
while (ct <= nb_page_by_dep):
div = 0
url_in_progress = build_url(i, ct)
ct = ct + 1
if (ct < nb_page_by_dep):
while(div < 10):
name, price = find_ehpad(url_in_progress, div)
div = div + 1
writer.writerow({'name': name, 'price': price, 'num_dep': i})
else:
total_div = nb_results - nb_page_by_dep * 10
while(div < total_div):
name, price = find_ehpad(url_in_progress, div)
div = div + 1
writer.writerow({'name': name, 'price': price, 'num_dep': i})
with open('liste-EHPAD-results.csv', 'w', newline='') as csvfile:
fieldnames = ['name', 'price', 'num_dep']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
# Recherche des résultats pour les départements de France métropolitaine
i = 1
while (i <= 95):
find_results(i)
print(i)
i = i + 1
break
# Recherche des résultats pour les départements d'Outre-Mer.
i = 971
while (i <= 976):
find_results(i)
print(i)
i = i + 1
# Recherche des résultats pour la Corse
find_results('2A')
find_results('2B')
aW1wb3J0IHJlcXVlc3RzCmltcG9ydCBiczQKaW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgb3MKaW1wb3J0IHJlCmltcG9ydCBjc3YKZnJvbSBtYXRoIGltcG9ydCAqCgpkZWYgYnVpbGRfdXJsKGksIG51bV9wYWdlKToKICAgIHRyeToKICAgICAgICBpZiAoaSA9PSAnMkEnIG9yIGkgPT0gJzJCJyk6CiAgICAgICAgICAgIHVybCA9ICdodHRwczovL3cuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLnYuZnIvYW5udWFpcmUtZWhwYWQtZW4taGViZXJnZW1lbnQtcGVybWFuZW50L3swfS8wP3BhZ2U9ezF9Jy5mb3JtYXQoaSwgbnVtX3BhZ2UpCiAgICAgICAgZWxpZiAoaSA8IDEwKToKICAgICAgICAgICAgdXJsID0gJ2h0dHBzOi8vdy4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4udi5mci9hbm51YWlyZS1laHBhZC1lbi1oZWJlcmdlbWVudC1wZXJtYW5lbnQvMHswfS8wP3BhZ2U9ezF9Jy5mb3JtYXQoaSwgbnVtX3BhZ2UpCiAgICAgICAgZWxzZToKICAgICAgICAgICAgdXJsID0gJ2h0dHBzOi8vdy4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4udi5mci9hbm51YWlyZS1laHBhZC1lbi1oZWJlcmdlbWVudC1wZXJtYW5lbnQvezB9LzA/cGFnZT17MX0nLmZvcm1hdChpLCBudW1fcGFnZSkKICAgICAgICByZXR1cm4odXJsKQogICAgZXhjZXB0OgogICAgICAgIHJldHVybiBOb25lCgpkZWYgZmluZF9uYl9yZXN1bHRzKHVybCk6CiAgICB0cnk6CiAgICAgICAgaGVhZGVycyA9IHsiVXNlci1BZ2VudCI6Ik1vemlsbGEvNS4wIn0KICAgICAgICByZXNwb25zZSA9IHJlcXVlc3RzLmdldCh1cmwsIGhlYWRlcnM9aGVhZGVycywgdGltZW91dD01KQogICAgICAgIHNvdXAgPSBiczQuQmVhdXRpZnVsU291cChyZXNwb25zZS50ZXh0LCAnbHhtbCcpCiAgICAgICAgdGFyZ2V0ID0gKHNvdXAuZmluZCgnaDInLCB7J2lkJzogImNuc2Etc2VhcmNoLXRvdGFsLXBob25lIn0pLnRleHQpCiAgICAgICAgdGFyZ2V0ID0gJycuam9pbihjIGZvciBjIGluIHRhcmdldCBpZiBjLmlzZGlnaXQoKSkgIAogICAgICAgIHJldHVybih0YXJnZXQpCiAgICBleGNlcHQ6CiAgICAgICAgcmV0dXJuIE5vbmUKCmRlZiBmaW5kX2VocGFkKHVybCwgbmIpOgogICAgdHJ5OgogICAgICAgIGhlYWRlcnMgPSB7IlVzZXItQWdlbnQiOiJNb3ppbGxhLzUuMCJ9CiAgICAgICAgcmVzcG9uc2UgPSByZXF1ZXN0cy5nZXQodXJsLCBoZWFkZXJzPWhlYWRlcnMsIHRpbWVvdXQ9NSkKICAgICAgICBzb3VwID0gYnM0LkJlYXV0aWZ1bFNvdXAocmVzcG9uc2UudGV4dCwgJ2x4bWwnKQogICAgICAgIHRhcmdldCA9IChzb3VwLmZpbmRBbGwoJ2RpdicsIHsnY2xhc3MnOiAiY25zYV9yZXN1bHRzLWl0ZW0taW5zaWRlIn0pW25iXSkKICAgICAgICBuYW1lID0gdGFyZ2V0LmZpbmQoJ2gzJywgeydjbGFzcyc6ICJjbnNhX3Jlc3VsdHMtdGFnczEifSkudGV4dAogICAgICAgIHByaWNlID0gdGFyZ2V0LmZpbmQoJ2RpdicsIHsnY2xhc3MnOiAiY2xlYXJmaXgifSkuZmluZCgnZGl2JywgeydjbGFzcyc6ICJwcml4In0pLmZpbmQoJ3N0cm9uZycpLnRleHQKICAgICAgICByZXR1cm4obmFtZSwgcHJpY2UpCiAgICBleGNlcHQ6CiAgICAgICAgIyBHw6hyZSBsZSBjYXMgZGVzIEVIUEFEIHF1aSBuZSBtZW50aW9ubmVudCBwYXMgbGUgcHJpeCByZXF1aXMgcG91ciB1biBow6liZXJnZW1lbnQgZW4gY2hhbWJyZSBzZXVsZQogICAgICAgIGhlYWRlcnMgPSB7IlVzZXItQWdlbnQiOiJNb3ppbGxhLzUuMCJ9CiAgICAgICAgcmVzcG9uc2UgPSByZXF1ZXN0cy5nZXQodXJsLCBoZWFkZXJzPWhlYWRlcnMsIHRpbWVvdXQ9NSkKICAgICAgICBzb3VwID0gYnM0LkJlYXV0aWZ1bFNvdXAocmVzcG9uc2UudGV4dCwgJ2x4bWwnKQogICAgICAgIHRhcmdldCA9IChzb3VwLmZpbmRBbGwoJ2RpdicsIHsnY2xhc3MnOiAiY25zYV9yZXN1bHRzLWl0ZW0taW5zaWRlIn0pW25iXSkKICAgICAgICBuYW1lID0gdGFyZ2V0LmZpbmQoJ2gzJywgeydjbGFzcyc6ICJjbnNhX3Jlc3VsdHMtdGFnczEifSkudGV4dAogICAgICAgIHJldHVybiAobmFtZSwgJ25vbiBpbmRpcXXDqScpCgpkZWYgZmluZF9yZXN1bHRzKGkpOgogICAgY3QgPSAwCiAgICB1cmwgPSBidWlsZF91cmwoaSwgMCkKICAgIG5iX3Jlc3VsdHMgPSBpbnQoZmluZF9uYl9yZXN1bHRzKHVybCkpCiAgICBuYl9wYWdlX2J5X2RlcCA9IGNlaWwobmJfcmVzdWx0cy8xMCkgLSAxCiAgICB3aGlsZSAoY3QgPD0gbmJfcGFnZV9ieV9kZXApOgogICAgICAgIGRpdiA9IDAKICAgICAgICB1cmxfaW5fcHJvZ3Jlc3MgPSBidWlsZF91cmwoaSwgY3QpCiAgICAgICAgY3QgPSBjdCArIDEKICAgICAgICBpZiAoY3QgPCBuYl9wYWdlX2J5X2RlcCk6IAogICAgICAgICAgICB3aGlsZShkaXYgPCAxMCk6CiAgICAgICAgICAgICAgICBuYW1lLCBwcmljZSA9IGZpbmRfZWhwYWQodXJsX2luX3Byb2dyZXNzLCBkaXYpCiAgICAgICAgICAgICAgICBkaXYgPSBkaXYgKyAxCiAgICAgICAgICAgICAgICB3cml0ZXIud3JpdGVyb3coeyduYW1lJzogbmFtZSwgJ3ByaWNlJzogcHJpY2UsICdudW1fZGVwJzogaX0pIAogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHRvdGFsX2RpdiA9IG5iX3Jlc3VsdHMgLSBuYl9wYWdlX2J5X2RlcCAqIDEwCiAgICAgICAgICAgIHdoaWxlKGRpdiA8IHRvdGFsX2Rpdik6CiAgICAgICAgICAgICAgICBuYW1lLCBwcmljZSA9IGZpbmRfZWhwYWQodXJsX2luX3Byb2dyZXNzLCBkaXYpCiAgICAgICAgICAgICAgICBkaXYgPSBkaXYgKyAxCiAgICAgICAgICAgICAgICB3cml0ZXIud3JpdGVyb3coeyduYW1lJzogbmFtZSwgJ3ByaWNlJzogcHJpY2UsICdudW1fZGVwJzogaX0pIAoKd2l0aCBvcGVuKCdsaXN0ZS1FSFBBRC1yZXN1bHRzLmNzdicsICd3JywgbmV3bGluZT0nJykgYXMgY3N2ZmlsZToKICAgIGZpZWxkbmFtZXMgPSBbJ25hbWUnLCAncHJpY2UnLCAnbnVtX2RlcCddCiAgICB3cml0ZXIgPSBjc3YuRGljdFdyaXRlcihjc3ZmaWxlLCBmaWVsZG5hbWVzPWZpZWxkbmFtZXMpCiAgICB3cml0ZXIud3JpdGVoZWFkZXIoKQogICAgIyBSZWNoZXJjaGUgZGVzIHLDqXN1bHRhdHMgcG91ciBsZXMgZMOpcGFydGVtZW50cyBkZSBGcmFuY2UgbcOpdHJvcG9saXRhaW5lCiAgICBpID0gMQogICAgd2hpbGUgKGkgPD0gOTUpOgogICAgICAgIGZpbmRfcmVzdWx0cyhpKQogICAgICAgIHByaW50KGkpCiAgICAgICAgaSA9IGkgKyAxCiAgICAgICAgYnJlYWsKICAgICMgUmVjaGVyY2hlIGRlcyByw6lzdWx0YXRzIHBvdXIgbGVzIGTDqXBhcnRlbWVudHMgZCdPdXRyZS1NZXIuCiAgICBpID0gOTcxCiAgICB3aGlsZSAoaSA8PSA5NzYpOgogICAgICAgIGZpbmRfcmVzdWx0cyhpKQogICAgICAgIHByaW50KGkpCiAgICAgICAgaSA9IGkgKyAxCiAgICAjIFJlY2hlcmNoZSBkZXMgcsOpc3VsdGF0cyBwb3VyIGxhIENvcnNlCiAgICBmaW5kX3Jlc3VsdHMoJzJBJykKICAgIGZpbmRfcmVzdWx0cygnMkInKQ==