diff --git a/crawler.py b/crawler.py index d8503cd..624b36f 100644 --- a/crawler.py +++ b/crawler.py @@ -239,12 +239,11 @@ def get_bio(url, name, sleep_for): print(f"Getting {url} for {name[1]} {name[0]}") response = request_handle_rate_limit(url) soup = BeautifulSoup(response.content, features="html.parser") - job_elem = soup.find(class_="m-biography__introInfo").find("span") - if job_elem: - job = job_elem.text - else: - job = None - cv = soup.find(class_="m-biography__biography").text.strip() + intro_info = soup.find(class_="m-biography__introInfo") + job_elem = intro_info.find("span") if intro_info else None + job = job_elem.text if job_elem else None + cv_elem = soup.find(class_="m-biography__biography") + cv = cv_elem.text.strip() if cv_elem else "" ajax_divs = soup.find_all(class_="m-ajaxLoadedContent") speech_div = None vote_div = None @@ -266,9 +265,10 @@ def get_bio(url, name, sleep_for): additional_functions = get_functions(function_divs[1]) else: additional_functions = None + mandate_elem = soup.find(class_="m-biography__subHeading --mandate") mandate = ( - soup.find(class_="m-biography__subHeading --mandate").text, - soup.find(string=re.compile(r"^Wahlkreis \d*:")), + mandate_elem.text if mandate_elem else "", + soup.find(string=re.compile(r"^Wahlkreis \d*:")) or "", ) disclosures = get_disclosures(soup.find(class_="m-biography__infos"))