import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
import re
import csv
from math import *
def build_url(i, num_page):
try:
if (i == '2A' or i == '2B'):
url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/{0}/0?page={1}'.format(i, num_page)
elif (i < 10):
url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/0{0}/0?page={1}'.format(i, num_page)
else:
url = 'https://w...content-available-to-author-only...v.fr/annuaire-ehpad-en-hebergement-permanent/{0}/0?page={1}'.format(i, num_page)
return(url)
except:
return None
def find_nb_results(url):
try:
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
soup = bs4.BeautifulSoup(response.text, 'lxml')
target = (soup.find('h2', {'id': "cnsa-search-total-phone"}).text)
target = ''.join(c for c in target if c.isdigit())
return(target)
except:
return None
def find_ehpad(url, nb):
try:
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
soup = bs4.BeautifulSoup(response.text, 'lxml')
target = (soup.findAll('div', {'class': "cnsa_results-item-inside"})[nb])
name = target.find('h3', {'class': "cnsa_results-tags1"}).text
price = target.find('div', {'class': "clearfix"}).find('div', {'class': "prix"}).find('strong').text
return(name, price)
except:
# Gère le cas des EHPAD qui ne mentionnent pas le prix requis pour un hébergement en chambre seule
headers = {"User-Agent":"Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=5)
soup = bs4.BeautifulSoup(response.text, 'lxml')
target = (soup.findAll('div', {'class': "cnsa_results-item-inside"})[nb])
name = target.find('h3', {'class': "cnsa_results-tags1"}).text
return (name, 'non indiqué')
def find_results(i):
ct = 0
url = build_url(i, 0)
nb_results = int(find_nb_results(url))
nb_page_by_dep = ceil(nb_results/10) - 1
while (ct <= nb_page_by_dep):
div = 0
url_in_progress = build_url(i, ct)
ct = ct + 1
if (ct < nb_page_by_dep):
while(div < 10):
name, price = find_ehpad(url_in_progress, div)
div = div + 1
else:
total_div = nb_results - nb_page_by_dep * 10
while(div < total_div):
name, price = find_ehpad(url_in_progress, div)
div = div + 1
# Recherche des résultats pour les départements de France métropolitaine
i = 1
while (i <= 95):
find_results(i)
print(i)
i = i + 1
break
# Recherche des résultats pour les départements d'Outre-Mer.
i = 971
while (i <= 976):
find_results(i)
print(i)
i = i + 1
# Recherche des résultats pour la Corse
find_results('2A')
find_results('2B')
aW1wb3J0IHJlcXVlc3RzCmZyb20gYnM0IGltcG9ydCBCZWF1dGlmdWxTb3VwCmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IG9zCmltcG9ydCByZQppbXBvcnQgY3N2CmZyb20gbWF0aCBpbXBvcnQgKgoKZGVmIGJ1aWxkX3VybChpLCBudW1fcGFnZSk6CiAgICB0cnk6CiAgICAgICAgaWYgKGkgPT0gJzJBJyBvciBpID09ICcyQicpOgogICAgICAgICAgICB1cmwgPSAnaHR0cHM6Ly93Li4uY29udGVudC1hdmFpbGFibGUtdG8tYXV0aG9yLW9ubHkuLi52LmZyL2FubnVhaXJlLWVocGFkLWVuLWhlYmVyZ2VtZW50LXBlcm1hbmVudC97MH0vMD9wYWdlPXsxfScuZm9ybWF0KGksIG51bV9wYWdlKQogICAgICAgIGVsaWYgKGkgPCAxMCk6CiAgICAgICAgICAgIHVybCA9ICdodHRwczovL3cuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLnYuZnIvYW5udWFpcmUtZWhwYWQtZW4taGViZXJnZW1lbnQtcGVybWFuZW50LzB7MH0vMD9wYWdlPXsxfScuZm9ybWF0KGksIG51bV9wYWdlKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHVybCA9ICdodHRwczovL3cuLi5jb250ZW50LWF2YWlsYWJsZS10by1hdXRob3Itb25seS4uLnYuZnIvYW5udWFpcmUtZWhwYWQtZW4taGViZXJnZW1lbnQtcGVybWFuZW50L3swfS8wP3BhZ2U9ezF9Jy5mb3JtYXQoaSwgbnVtX3BhZ2UpCiAgICAgICAgcmV0dXJuKHVybCkKICAgIGV4Y2VwdDoKICAgICAgICByZXR1cm4gTm9uZQoKZGVmIGZpbmRfbmJfcmVzdWx0cyh1cmwpOgogICAgdHJ5OgogICAgICAgIGhlYWRlcnMgPSB7IlVzZXItQWdlbnQiOiJNb3ppbGxhLzUuMCJ9CiAgICAgICAgcmVzcG9uc2UgPSByZXF1ZXN0cy5nZXQodXJsLCBoZWFkZXJzPWhlYWRlcnMsIHRpbWVvdXQ9NSkKICAgICAgICBzb3VwID0gYnM0LkJlYXV0aWZ1bFNvdXAocmVzcG9uc2UudGV4dCwgJ2x4bWwnKQogICAgICAgIHRhcmdldCA9IChzb3VwLmZpbmQoJ2gyJywgeydpZCc6ICJjbnNhLXNlYXJjaC10b3RhbC1waG9uZSJ9KS50ZXh0KQogICAgICAgIHRhcmdldCA9ICcnLmpvaW4oYyBmb3IgYyBpbiB0YXJnZXQgaWYgYy5pc2RpZ2l0KCkpICAKICAgICAgICByZXR1cm4odGFyZ2V0KQogICAgZXhjZXB0OgogICAgICAgIHJldHVybiBOb25lCgpkZWYgZmluZF9laHBhZCh1cmwsIG5iKToKICAgIHRyeToKICAgICAgICBoZWFkZXJzID0geyJVc2VyLUFnZW50IjoiTW96aWxsYS81LjAifQogICAgICAgIHJlc3BvbnNlID0gcmVxdWVzdHMuZ2V0KHVybCwgaGVhZGVycz1oZWFkZXJzLCB0aW1lb3V0PTUpCiAgICAgICAgc291cCA9IGJzNC5CZWF1dGlmdWxTb3VwKHJlc3BvbnNlLnRleHQsICdseG1sJykKICAgICAgICB0YXJnZXQgPSAoc291cC5maW5kQWxsKCdkaXYnLCB7J2NsYXNzJzogImNuc2FfcmVzdWx0cy1pdGVtLWluc2lkZSJ9KVtuYl0pCiAgICAgICAgbmFtZSA9IHRhcmdldC5maW5kKCdoMycsIHsnY2xhc3MnOiAiY25zYV9yZXN1bHRzLXRhZ3MxIn0pLnRleHQKICAgICAgICBwcmljZSA9IHRhcmdldC5maW5kKCdkaXYnLCB7J2NsYXNzJzogImNsZWFyZml4In0pLmZpbmQoJ2RpdicsIHsnY2xhc3MnOiAicHJpeCJ9KS5maW5kKCdzdHJvbmcnKS50ZXh0CiAgICAgICAgcmV0dXJuKG5hbWUsIHByaWNlKQogICAgZXhjZXB0OgogICAgICAgICMgR8OocmUgbGUgY2FzIGRlcyBFSFBBRCBxdWkgbmUgbWVudGlvbm5lbnQgcGFzIGxlIHByaXggcmVxdWlzIHBvdXIgdW4gaMOpYmVyZ2VtZW50IGVuIGNoYW1icmUgc2V1bGUKICAgICAgICBoZWFkZXJzID0geyJVc2VyLUFnZW50IjoiTW96aWxsYS81LjAifQogICAgICAgIHJlc3BvbnNlID0gcmVxdWVzdHMuZ2V0KHVybCwgaGVhZGVycz1oZWFkZXJzLCB0aW1lb3V0PTUpCiAgICAgICAgc291cCA9IGJzNC5CZWF1dGlmdWxTb3VwKHJlc3BvbnNlLnRleHQsICdseG1sJykKICAgICAgICB0YXJnZXQgPSAoc291cC5maW5kQWxsKCdkaXYnLCB7J2NsYXNzJzogImNuc2FfcmVzdWx0cy1pdGVtLWluc2lkZSJ9KVtuYl0pCiAgICAgICAgbmFtZSA9IHRhcmdldC5maW5kKCdoMycsIHsnY2xhc3MnOiAiY25zYV9yZXN1bHRzLXRhZ3MxIn0pLnRleHQKICAgICAgICByZXR1cm4gKG5hbWUsICdub24gaW5kaXF1w6knKQoKZGVmIGZpbmRfcmVzdWx0cyhpKToKICAgIGN0ID0gMAogICAgdXJsID0gYnVpbGRfdXJsKGksIDApCiAgICBuYl9yZXN1bHRzID0gaW50KGZpbmRfbmJfcmVzdWx0cyh1cmwpKQogICAgbmJfcGFnZV9ieV9kZXAgPSBjZWlsKG5iX3Jlc3VsdHMvMTApIC0gMQogICAgd2hpbGUgKGN0IDw9IG5iX3BhZ2VfYnlfZGVwKToKICAgICAgICBkaXYgPSAwCiAgICAgICAgdXJsX2luX3Byb2dyZXNzID0gYnVpbGRfdXJsKGksIGN0KQogICAgICAgIGN0ID0gY3QgKyAxCiAgICAgICAgaWYgKGN0IDwgbmJfcGFnZV9ieV9kZXApOiAKICAgICAgICAgICAgd2hpbGUoZGl2IDwgMTApOgogICAgICAgICAgICAgICAgbmFtZSwgcHJpY2UgPSBmaW5kX2VocGFkKHVybF9pbl9wcm9ncmVzcywgZGl2KQogICAgICAgICAgICAgICAgZGl2ID0gZGl2ICsgMQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHRvdGFsX2RpdiA9IG5iX3Jlc3VsdHMgLSBuYl9wYWdlX2J5X2RlcCAqIDEwCiAgICAgICAgICAgIHdoaWxlKGRpdiA8IHRvdGFsX2Rpdik6CiAgICAgICAgICAgICAgICBuYW1lLCBwcmljZSA9IGZpbmRfZWhwYWQodXJsX2luX3Byb2dyZXNzLCBkaXYpCiAgICAgICAgICAgICAgICBkaXYgPSBkaXYgKyAxCgogICAgIyBSZWNoZXJjaGUgZGVzIHLDqXN1bHRhdHMgcG91ciBsZXMgZMOpcGFydGVtZW50cyBkZSBGcmFuY2UgbcOpdHJvcG9saXRhaW5lCmkgPSAxCndoaWxlIChpIDw9IDk1KToKICAgIGZpbmRfcmVzdWx0cyhpKQogICAgcHJpbnQoaSkKICAgIGkgPSBpICsgMQogICAgYnJlYWsKIyBSZWNoZXJjaGUgZGVzIHLDqXN1bHRhdHMgcG91ciBsZXMgZMOpcGFydGVtZW50cyBkJ091dHJlLU1lci4KaSA9IDk3MQp3aGlsZSAoaSA8PSA5NzYpOgogICAgZmluZF9yZXN1bHRzKGkpCiAgICBwcmludChpKQogICAgaSA9IGkgKyAxCiMgUmVjaGVyY2hlIGRlcyByw6lzdWx0YXRzIHBvdXIgbGEgQ29yc2UKZmluZF9yZXN1bHRzKCcyQScpCmZpbmRfcmVzdWx0cygnMkInKQo=