add parser feature to beautifulsoup constructor to silence warnings

2025-11-17 17:28:52 +01:00 · 2025-11-17 17:28:52 +01:00 · 026adbc0ea
commit 026adbc0ea
parent 0fcd725192
1 changed files with 4 additions and 4 deletions
--- a/crawler.py
+++ b/crawler.py
@ -220,7 +220,7 @@ def group_by_party(bios):
 def get_links_and_names():
    response = requests.get(BUNDESTAG_URL)
-    soup = BeautifulSoup(response.content)
+    soup = BeautifulSoup(response.content, features="html.parser")
    links = [a.get("href") for a in soup.find_all("a")]
    names = [a.text.strip().split("\n\n\n") for a in soup.find_all("a")]
@ -232,7 +232,7 @@ def get_bio(url, name, sleep_for):
    name = name.split(", ")
    print(f"Getting {url} for {name[1]} {name[0]}")
    response = request_handle_rate_limit(url)
-    soup = BeautifulSoup(response.content)
+    soup = BeautifulSoup(response.content, features="html.parser")
    job = soup.find(class_="m-biography__introInfo").find("span").text
    cv = soup.find(class_="m-biography__biography").text.strip()
    ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
@ -343,7 +343,7 @@ def get_functions(elem):
 def parse_speech(page):
    if not page:
        return None
-    soup = BeautifulSoup(page.content)
+    soup = BeautifulSoup(page.content, features="html.parser")
    infos = [s.text.strip() for s in soup.find_all(class_="m-biography__speechTitle")]
    titles = [
        title.text.strip()
@ -356,7 +356,7 @@ def parse_speech(page):
 def parse_vote(page):
    if not page:
        return None
-    soup = BeautifulSoup(page.content)
+    soup = BeautifulSoup(page.content, features="html.parser")
    rows = soup.find_all("tr")[1:]
    parsed = []
    for row in rows: