fix replacement character warnings by using decoded response text

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marco Lents 2026-04-13 22:21:40 +02:00
parent 6a6c478b43
commit 1a80fe1647

View file

@ -225,7 +225,7 @@ def group_by_party(bios):
def get_links_and_names(): def get_links_and_names():
response = requests.get(BUNDESTAG_URL) response = requests.get(BUNDESTAG_URL)
soup = BeautifulSoup(response.content, features="html.parser") soup = BeautifulSoup(response.text, features="html.parser")
links = [a.get("href") for a in soup.find_all("a")] links = [a.get("href") for a in soup.find_all("a")]
names = [a.text.strip().split("\n\n\n") for a in soup.find_all("a")] names = [a.text.strip().split("\n\n\n") for a in soup.find_all("a")]
@ -238,7 +238,7 @@ def get_bio(url, name, sleep_for):
party = party.replace("\n\xa0*", " (ausgeschieden)") party = party.replace("\n\xa0*", " (ausgeschieden)")
print(f"Getting {url} for {name[1]} {name[0]}") print(f"Getting {url} for {name[1]} {name[0]}")
response = request_handle_rate_limit(url) response = request_handle_rate_limit(url)
soup = BeautifulSoup(response.content, features="html.parser") soup = BeautifulSoup(response.text, features="html.parser")
intro_info = soup.find(class_="m-biography__introInfo") intro_info = soup.find(class_="m-biography__introInfo")
job_elem = intro_info.find("span") if intro_info else None job_elem = intro_info.find("span") if intro_info else None
job = job_elem.text if job_elem else None job = job_elem.text if job_elem else None
@ -353,7 +353,7 @@ def get_functions(elem):
def parse_speech(page): def parse_speech(page):
if not page: if not page:
return None return None
soup = BeautifulSoup(page.content, features="html.parser") soup = BeautifulSoup(page.text, features="html.parser")
infos = [s.text.strip() for s in soup.find_all(class_="m-biography__speechTitle")] infos = [s.text.strip() for s in soup.find_all(class_="m-biography__speechTitle")]
titles = [ titles = [
title.text.strip() title.text.strip()
@ -366,7 +366,7 @@ def parse_speech(page):
def parse_vote(page): def parse_vote(page):
if not page: if not page:
return None return None
soup = BeautifulSoup(page.content, features="html.parser") soup = BeautifulSoup(page.text, features="html.parser")
rows = soup.find_all("tr")[1:] rows = soup.find_all("tr")[1:]
parsed = [] parsed = []
for row in rows: for row in rows: