clean up: explicit utf-8 encoding, proper exception handling, remove dead code

- Add encoding="utf-8" to all file writes
- Catch requests.RequestException instead of bare except
- Use raise_for_status() to also retry on HTTP errors
- Use removeprefix/removesuffix instead of lstrip/rstrip
- Use makedirs(exist_ok=True)
- Remove unused common_suffix function and commonprefix import

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marco Lents 2026-04-13 22:27:03 +02:00
parent 1a80fe1647
commit 14670538f6

View file

@ -3,7 +3,6 @@ import re
import json
from bs4 import BeautifulSoup
from time import sleep
from os.path import commonprefix
from os import makedirs
from git import Repo
import argparse
@ -167,16 +166,13 @@ def save_individuals(bios, out):
first_letter = rep.name[0][0].upper()
name_str = f"{rep.name[0]} {rep.name[1]}".replace(" ", "_")
dir = f"{out}/Abgeordnete/{first_letter}"
try:
makedirs(dir)
except FileExistsError:
pass
with open(f"{dir}/{name_str}.md", "w") as rep_file:
makedirs(dir, exist_ok=True)
with open(f"{dir}/{name_str}.md", "w", encoding="utf-8") as rep_file:
rep_file.write(str(rep))
def save_raw(bios, out):
with open(f"{out}/raw.json", "w") as raw_file:
with open(f"{out}/raw.json", "w", encoding="utf-8") as raw_file:
json.dump(
[bio.to_dict() for bio in bios], raw_file, indent=2, ensure_ascii=False
)
@ -184,10 +180,7 @@ def save_raw(bios, out):
def save_disclosures(bios, out):
dir = f"{out}/Voep_Angaben"
try:
makedirs(dir)
except FileExistsError:
pass
makedirs(dir, exist_ok=True)
bios_with_discl = [bio for bio in bios if bio.disclosures]
alpha_str = ""
for bio in sorted(bios_with_discl, key=lambda b: b.name):
@ -195,7 +188,7 @@ def save_disclosures(bios, out):
alpha_str += funcs_to_str(bio.disclosures)
alpha_str += "\n"
with open(f"{dir}/Alphabetisch.md", "w") as alpha_file:
with open(f"{dir}/Alphabetisch.md", "w", encoding="utf-8") as alpha_file:
alpha_file.write(alpha_str.strip())
party_str = ""
@ -206,7 +199,7 @@ def save_disclosures(bios, out):
party_str += funcs_to_str(bio.disclosures)
party_str += "\n"
with open(f"{dir}/Nach_Partei.md", "w") as party_file:
with open(f"{dir}/Nach_Partei.md", "w", encoding="utf-8") as party_file:
party_file.write(party_str.strip())
@ -293,9 +286,11 @@ def get_bio(url, name, sleep_for):
def request_handle_rate_limit(url):
for _ in range(5):
try:
return requests.get(url)
except:
print("Rate limit! waiting 5min")
response = requests.get(url)
response.raise_for_status()
return response
except requests.RequestException:
print("Request failed! waiting 5min")
sleep(300)
return requests.get(url)
@ -378,7 +373,7 @@ def parse_vote(page):
def get_ajax(elem):
if not elem:
return None
inner = elem.get("x-data").lstrip("dynamicTemplateOutput(").rstrip(")")
inner = elem.get("x-data").removeprefix("dynamicTemplateOutput(").removesuffix(")")
data = json.loads(inner)
url = BUNDESTAG_BASE_URL + data["endpoint"]
filters = data["filters"]
@ -391,9 +386,5 @@ def get_ajax(elem):
return response
def common_suffix(strings):
return commonprefix([s[::-1] for s in strings])[::-1]
if __name__ == "__main__":
main()