extract graceful url handling into separate function
This commit is contained in:
parent
d47627643f
commit
6ef8fcc993
1 changed files with 12 additions and 12 deletions
24
crawler.py
24
crawler.py
|
|
@ -184,12 +184,7 @@ def get_bio(url, name, sleep_for):
|
|||
name, party = name
|
||||
name = name.split(", ")
|
||||
print(f"Getting {url} for {name[1]} {name[0]}")
|
||||
for _ in range(5):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
except:
|
||||
print("Rate limit! waiting 5min")
|
||||
sleep(300)
|
||||
response = request_handle_rate_limit(url)
|
||||
soup = BeautifulSoup(response.content)
|
||||
cv = soup.find(class_="m-biography__biography").text.strip()
|
||||
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
||||
|
|
@ -236,6 +231,16 @@ def get_bio(url, name, sleep_for):
|
|||
return bio
|
||||
|
||||
|
||||
def request_handle_rate_limit(url):
|
||||
for _ in range(5):
|
||||
try:
|
||||
return requests.get(url)
|
||||
except:
|
||||
print("Rate limit! waiting 5min")
|
||||
sleep(300)
|
||||
return requests.get(url)
|
||||
|
||||
|
||||
def get_disclosures(elem):
|
||||
if not elem:
|
||||
return None
|
||||
|
|
@ -319,12 +324,7 @@ def get_ajax(elem):
|
|||
for key, value in filters.items()
|
||||
]
|
||||
url = url + "?" + "&".join(f"{key}={val}" for key, val in sanitized_filters)
|
||||
for _ in range(5):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
except:
|
||||
print("Rate limit! waiting 5min")
|
||||
sleep(300)
|
||||
response = request_handle_rate_limit(url)
|
||||
return response
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue