clean up: explicit utf-8 encoding, proper exception handling, remove dead code

- Add encoding="utf-8" to all file writes
- Catch requests.RequestException instead of bare except
- Use raise_for_status() to also retry on HTTP errors
- Use removeprefix/removesuffix instead of lstrip/rstrip
- Use makedirs(exist_ok=True)
- Remove unused common_suffix function and commonprefix import

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marco Lents 2026-04-13 22:27:03 +02:00
parent 1a80fe1647
commit 14670538f6

View file

@ -3,7 +3,6 @@ import re
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import sleep from time import sleep
from os.path import commonprefix
from os import makedirs from os import makedirs
from git import Repo from git import Repo
import argparse import argparse
@ -167,16 +166,13 @@ def save_individuals(bios, out):
first_letter = rep.name[0][0].upper() first_letter = rep.name[0][0].upper()
name_str = f"{rep.name[0]} {rep.name[1]}".replace(" ", "_") name_str = f"{rep.name[0]} {rep.name[1]}".replace(" ", "_")
dir = f"{out}/Abgeordnete/{first_letter}" dir = f"{out}/Abgeordnete/{first_letter}"
try: makedirs(dir, exist_ok=True)
makedirs(dir) with open(f"{dir}/{name_str}.md", "w", encoding="utf-8") as rep_file:
except FileExistsError:
pass
with open(f"{dir}/{name_str}.md", "w") as rep_file:
rep_file.write(str(rep)) rep_file.write(str(rep))
def save_raw(bios, out): def save_raw(bios, out):
with open(f"{out}/raw.json", "w") as raw_file: with open(f"{out}/raw.json", "w", encoding="utf-8") as raw_file:
json.dump( json.dump(
[bio.to_dict() for bio in bios], raw_file, indent=2, ensure_ascii=False [bio.to_dict() for bio in bios], raw_file, indent=2, ensure_ascii=False
) )
@ -184,10 +180,7 @@ def save_raw(bios, out):
def save_disclosures(bios, out): def save_disclosures(bios, out):
dir = f"{out}/Voep_Angaben" dir = f"{out}/Voep_Angaben"
try: makedirs(dir, exist_ok=True)
makedirs(dir)
except FileExistsError:
pass
bios_with_discl = [bio for bio in bios if bio.disclosures] bios_with_discl = [bio for bio in bios if bio.disclosures]
alpha_str = "" alpha_str = ""
for bio in sorted(bios_with_discl, key=lambda b: b.name): for bio in sorted(bios_with_discl, key=lambda b: b.name):
@ -195,7 +188,7 @@ def save_disclosures(bios, out):
alpha_str += funcs_to_str(bio.disclosures) alpha_str += funcs_to_str(bio.disclosures)
alpha_str += "\n" alpha_str += "\n"
with open(f"{dir}/Alphabetisch.md", "w") as alpha_file: with open(f"{dir}/Alphabetisch.md", "w", encoding="utf-8") as alpha_file:
alpha_file.write(alpha_str.strip()) alpha_file.write(alpha_str.strip())
party_str = "" party_str = ""
@ -206,7 +199,7 @@ def save_disclosures(bios, out):
party_str += funcs_to_str(bio.disclosures) party_str += funcs_to_str(bio.disclosures)
party_str += "\n" party_str += "\n"
with open(f"{dir}/Nach_Partei.md", "w") as party_file: with open(f"{dir}/Nach_Partei.md", "w", encoding="utf-8") as party_file:
party_file.write(party_str.strip()) party_file.write(party_str.strip())
@ -293,9 +286,11 @@ def get_bio(url, name, sleep_for):
def request_handle_rate_limit(url): def request_handle_rate_limit(url):
for _ in range(5): for _ in range(5):
try: try:
return requests.get(url) response = requests.get(url)
except: response.raise_for_status()
print("Rate limit! waiting 5min") return response
except requests.RequestException:
print("Request failed! waiting 5min")
sleep(300) sleep(300)
return requests.get(url) return requests.get(url)
@ -378,7 +373,7 @@ def parse_vote(page):
def get_ajax(elem): def get_ajax(elem):
if not elem: if not elem:
return None return None
inner = elem.get("x-data").lstrip("dynamicTemplateOutput(").rstrip(")") inner = elem.get("x-data").removeprefix("dynamicTemplateOutput(").removesuffix(")")
data = json.loads(inner) data = json.loads(inner)
url = BUNDESTAG_BASE_URL + data["endpoint"] url = BUNDESTAG_BASE_URL + data["endpoint"]
filters = data["filters"] filters = data["filters"]
@ -391,9 +386,5 @@ def get_ajax(elem):
return response return response
def common_suffix(strings):
return commonprefix([s[::-1] for s in strings])[::-1]
if __name__ == "__main__": if __name__ == "__main__":
main() main()