fix crash when biography page elements are missing
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
bc6dcea7e1
commit
6a6c478b43
1 changed files with 8 additions and 8 deletions
16
crawler.py
16
crawler.py
|
|
@ -239,12 +239,11 @@ def get_bio(url, name, sleep_for):
|
||||||
print(f"Getting {url} for {name[1]} {name[0]}")
|
print(f"Getting {url} for {name[1]} {name[0]}")
|
||||||
response = request_handle_rate_limit(url)
|
response = request_handle_rate_limit(url)
|
||||||
soup = BeautifulSoup(response.content, features="html.parser")
|
soup = BeautifulSoup(response.content, features="html.parser")
|
||||||
job_elem = soup.find(class_="m-biography__introInfo").find("span")
|
intro_info = soup.find(class_="m-biography__introInfo")
|
||||||
if job_elem:
|
job_elem = intro_info.find("span") if intro_info else None
|
||||||
job = job_elem.text
|
job = job_elem.text if job_elem else None
|
||||||
else:
|
cv_elem = soup.find(class_="m-biography__biography")
|
||||||
job = None
|
cv = cv_elem.text.strip() if cv_elem else ""
|
||||||
cv = soup.find(class_="m-biography__biography").text.strip()
|
|
||||||
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
|
||||||
speech_div = None
|
speech_div = None
|
||||||
vote_div = None
|
vote_div = None
|
||||||
|
|
@ -266,9 +265,10 @@ def get_bio(url, name, sleep_for):
|
||||||
additional_functions = get_functions(function_divs[1])
|
additional_functions = get_functions(function_divs[1])
|
||||||
else:
|
else:
|
||||||
additional_functions = None
|
additional_functions = None
|
||||||
|
mandate_elem = soup.find(class_="m-biography__subHeading --mandate")
|
||||||
mandate = (
|
mandate = (
|
||||||
soup.find(class_="m-biography__subHeading --mandate").text,
|
mandate_elem.text if mandate_elem else "",
|
||||||
soup.find(string=re.compile(r"^Wahlkreis \d*:")),
|
soup.find(string=re.compile(r"^Wahlkreis \d*:")) or "",
|
||||||
)
|
)
|
||||||
disclosures = get_disclosures(soup.find(class_="m-biography__infos"))
|
disclosures = get_disclosures(soup.find(class_="m-biography__infos"))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue