add parser feature to beautifulsoup constructor to silence warnings

This commit is contained in:
Marco Lents 2025-11-17 17:28:52 +01:00
parent 0fcd725192
commit 026adbc0ea

View file

@ -220,7 +220,7 @@ def group_by_party(bios):
def get_links_and_names(): def get_links_and_names():
response = requests.get(BUNDESTAG_URL) response = requests.get(BUNDESTAG_URL)
soup = BeautifulSoup(response.content) soup = BeautifulSoup(response.content, features="html.parser")
links = [a.get("href") for a in soup.find_all("a")] links = [a.get("href") for a in soup.find_all("a")]
names = [a.text.strip().split("\n\n\n") for a in soup.find_all("a")] names = [a.text.strip().split("\n\n\n") for a in soup.find_all("a")]
@ -232,7 +232,7 @@ def get_bio(url, name, sleep_for):
name = name.split(", ") name = name.split(", ")
print(f"Getting {url} for {name[1]} {name[0]}") print(f"Getting {url} for {name[1]} {name[0]}")
response = request_handle_rate_limit(url) response = request_handle_rate_limit(url)
soup = BeautifulSoup(response.content) soup = BeautifulSoup(response.content, features="html.parser")
job = soup.find(class_="m-biography__introInfo").find("span").text job = soup.find(class_="m-biography__introInfo").find("span").text
cv = soup.find(class_="m-biography__biography").text.strip() cv = soup.find(class_="m-biography__biography").text.strip()
ajax_divs = soup.find_all(class_="m-ajaxLoadedContent") ajax_divs = soup.find_all(class_="m-ajaxLoadedContent")
@ -343,7 +343,7 @@ def get_functions(elem):
def parse_speech(page): def parse_speech(page):
if not page: if not page:
return None return None
soup = BeautifulSoup(page.content) soup = BeautifulSoup(page.content, features="html.parser")
infos = [s.text.strip() for s in soup.find_all(class_="m-biography__speechTitle")] infos = [s.text.strip() for s in soup.find_all(class_="m-biography__speechTitle")]
titles = [ titles = [
title.text.strip() title.text.strip()
@ -356,7 +356,7 @@ def parse_speech(page):
def parse_vote(page): def parse_vote(page):
if not page: if not page:
return None return None
soup = BeautifulSoup(page.content) soup = BeautifulSoup(page.content, features="html.parser")
rows = soup.find_all("tr")[1:] rows = soup.find_all("tr")[1:]
parsed = [] parsed = []
for row in rows: for row in rows: