From 1a80fe16471683123cf9fe059d5366fa36f6c70c Mon Sep 17 00:00:00 2001 From: Marco Lents Date: Mon, 13 Apr 2026 22:21:40 +0200 Subject: [PATCH] fix replacement character warnings by using decoded response text Co-Authored-By: Claude Opus 4.6 (1M context) --- crawler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crawler.py b/crawler.py index 624b36f..3004bf7 100644 --- a/crawler.py +++ b/crawler.py @@ -225,7 +225,7 @@ def group_by_party(bios): def get_links_and_names(): response = requests.get(BUNDESTAG_URL) - soup = BeautifulSoup(response.content, features="html.parser") + soup = BeautifulSoup(response.text, features="html.parser") links = [a.get("href") for a in soup.find_all("a")] names = [a.text.strip().split("\n\n\n") for a in soup.find_all("a")] @@ -238,7 +238,7 @@ def get_bio(url, name, sleep_for): party = party.replace("\n\xa0*", " (ausgeschieden)") print(f"Getting {url} for {name[1]} {name[0]}") response = request_handle_rate_limit(url) - soup = BeautifulSoup(response.content, features="html.parser") + soup = BeautifulSoup(response.text, features="html.parser") intro_info = soup.find(class_="m-biography__introInfo") job_elem = intro_info.find("span") if intro_info else None job = job_elem.text if job_elem else None @@ -353,7 +353,7 @@ def get_functions(elem): def parse_speech(page): if not page: return None - soup = BeautifulSoup(page.content, features="html.parser") + soup = BeautifulSoup(page.text, features="html.parser") infos = [s.text.strip() for s in soup.find_all(class_="m-biography__speechTitle")] titles = [ title.text.strip() @@ -366,7 +366,7 @@ def parse_speech(page): def parse_vote(page): if not page: return None - soup = BeautifulSoup(page.content, features="html.parser") + soup = BeautifulSoup(page.text, features="html.parser") rows = soup.find_all("tr")[1:] parsed = [] for row in rows: