extract graceful url handling into separate function
This commit is contained in:
parent
d47627643f
commit
6ef8fcc993
1 changed files with 12 additions and 12 deletions
24
crawler.py
24
crawler.py
|
|
@ -184,12 +184,7 @@ def get_bio(url, name, sleep_for):
|
||||||
name, party = name
|
name, party = name
|
||||||
name = name.split(", ")
|
name = name.split(", ")
|
||||||
print(f"Getting {url} for {name[1]} {name[0]}")
|
print(f"Getting {url} for {name[1]} {name[0]}")
|
||||||
for _ in range(5):
|
response = request_handle_rate_limit(url)
|
||||||
try:
|
|
||||||
response = requests.get(url)
|
|
||||||
except:
|
|
||||||
print("Rate limit! waiting 5min")
|
|
||||||
sleep(300)
|
|
||||||
soup = BeautifulSoup(response.content)
|
soup = BeautifulSoup(response.content)
|
||||||
cv = soup.find(class_="m-biography__biography").text.strip()
|
cv = soup.find(class_="m-biography__biography").text.strip()
|
||||||
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
||||||
|
|
@ -236,6 +231,16 @@ def get_bio(url, name, sleep_for):
|
||||||
return bio
|
return bio
|
||||||
|
|
||||||
|
|
||||||
|
def request_handle_rate_limit(url):
|
||||||
|
for _ in range(5):
|
||||||
|
try:
|
||||||
|
return requests.get(url)
|
||||||
|
except:
|
||||||
|
print("Rate limit! waiting 5min")
|
||||||
|
sleep(300)
|
||||||
|
return requests.get(url)
|
||||||
|
|
||||||
|
|
||||||
def get_disclosures(elem):
|
def get_disclosures(elem):
|
||||||
if not elem:
|
if not elem:
|
||||||
return None
|
return None
|
||||||
|
|
@ -319,12 +324,7 @@ def get_ajax(elem):
|
||||||
for key, value in filters.items()
|
for key, value in filters.items()
|
||||||
]
|
]
|
||||||
url = url + "?" + "&".join(f"{key}={val}" for key, val in sanitized_filters)
|
url = url + "?" + "&".join(f"{key}={val}" for key, val in sanitized_filters)
|
||||||
for _ in range(5):
|
response = request_handle_rate_limit(url)
|
||||||
try:
|
|
||||||
response = requests.get(url)
|
|
||||||
except:
|
|
||||||
print("Rate limit! waiting 5min")
|
|
||||||
sleep(300)
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue