sort functions alphabetically and handle rate limit more gracefully
This commit is contained in:
parent
3f0130c75b
commit
be2371e0f5
1 changed files with 14 additions and 3 deletions
17
crawler.py
17
crawler.py
|
|
@ -112,7 +112,7 @@ def funcs_to_str(funcs):
|
|||
out = ""
|
||||
for func in funcs:
|
||||
out += f"\n- {func[0]}"
|
||||
for loc in func[1]:
|
||||
for loc in sorted(func[1]):
|
||||
out += f"\n - {loc}"
|
||||
return out
|
||||
|
||||
|
|
@ -184,7 +184,12 @@ def get_bio(url, name, sleep_for):
|
|||
name, party = name
|
||||
name = name.split(", ")
|
||||
print(f"Getting {url} for {name[1]} {name[0]}")
|
||||
response = requests.get(url)
|
||||
for _ in range(5):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
except:
|
||||
print("Rate limit! waiting 5min")
|
||||
sleep(300)
|
||||
soup = BeautifulSoup(response.content)
|
||||
cv = soup.find(class_="m-biography__biography").text.strip()
|
||||
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
||||
|
|
@ -314,7 +319,13 @@ def get_ajax(elem):
|
|||
for key, value in filters.items()
|
||||
]
|
||||
url = url + "?" + "&".join(f"{key}={val}" for key, val in sanitized_filters)
|
||||
return requests.get(url)
|
||||
for _ in range(5):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
except:
|
||||
print("Rate limit! waiting 5min")
|
||||
sleep(300)
|
||||
return response
|
||||
|
||||
|
||||
def common_suffix(strings):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue