diff --git a/crawler.py b/crawler.py index 46aa103..c31051e 100644 --- a/crawler.py +++ b/crawler.py @@ -112,7 +112,7 @@ def funcs_to_str(funcs): out = "" for func in funcs: out += f"\n- {func[0]}" - for loc in func[1]: + for loc in sorted(func[1]): out += f"\n - {loc}" return out @@ -184,7 +184,12 @@ def get_bio(url, name, sleep_for): name, party = name name = name.split(", ") print(f"Getting {url} for {name[1]} {name[0]}") - response = requests.get(url) + for _ in range(5): + try: + response = requests.get(url) + except: + print("Rate limit! waiting 5min") + sleep(300) soup = BeautifulSoup(response.content) cv = soup.find(class_="m-biography__biography").text.strip() ajax_divs = soup.find_all(class_="m-ajaxLoadedContent") @@ -314,7 +319,13 @@ def get_ajax(elem): for key, value in filters.items() ] url = url + "?" + "&".join(f"{key}={val}" for key, val in sanitized_filters) - return requests.get(url) + for _ in range(5): + try: + response = requests.get(url) + except: + print("Rate limit! waiting 5min") + sleep(300) + return response def common_suffix(strings):