fix crash when biography page elements are missing
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
bc6dcea7e1
commit
6a6c478b43
1 changed files with 8 additions and 8 deletions
16
crawler.py
16
crawler.py
|
|
@ -239,12 +239,11 @@ def get_bio(url, name, sleep_for):
|
|||
print(f"Getting {url} for {name[1]} {name[0]}")
|
||||
response = request_handle_rate_limit(url)
|
||||
soup = BeautifulSoup(response.content, features="html.parser")
|
||||
job_elem = soup.find(class_="m-biography__introInfo").find("span")
|
||||
if job_elem:
|
||||
job = job_elem.text
|
||||
else:
|
||||
job = None
|
||||
cv = soup.find(class_="m-biography__biography").text.strip()
|
||||
intro_info = soup.find(class_="m-biography__introInfo")
|
||||
job_elem = intro_info.find("span") if intro_info else None
|
||||
job = job_elem.text if job_elem else None
|
||||
cv_elem = soup.find(class_="m-biography__biography")
|
||||
cv = cv_elem.text.strip() if cv_elem else ""
|
||||
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
||||
speech_div = None
|
||||
vote_div = None
|
||||
|
|
@ -266,9 +265,10 @@ def get_bio(url, name, sleep_for):
|
|||
additional_functions = get_functions(function_divs[1])
|
||||
else:
|
||||
additional_functions = None
|
||||
mandate_elem = soup.find(class_="m-biography__subHeading --mandate")
|
||||
mandate = (
|
||||
soup.find(class_="m-biography__subHeading --mandate").text,
|
||||
soup.find(string=re.compile(r"^Wahlkreis \d*:")),
|
||||
mandate_elem.text if mandate_elem else "",
|
||||
soup.find(string=re.compile(r"^Wahlkreis \d*:")) or "",
|
||||
)
|
||||
disclosures = get_disclosures(soup.find(class_="m-biography__infos"))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue