generate descriptive commit messages by diffing against previous run

Loads the previous raw.json before saving, compares against current
crawl, and generates commit messages listing: new/departed representatives,
party changes, new disclosures, and total profiles updated.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marco Lents 2026-04-13 22:37:50 +02:00
parent e5a43977c1
commit fb38bb5894

View file

@ -144,6 +144,8 @@ def main():
bios = [get_bio(link, name, sleep_for) for link, name in zip(links, names)] bios = [get_bio(link, name, sleep_for) for link, name in zip(links, names)]
old_bios = load_old_bios(args.out)
if not args.debug: if not args.debug:
save_raw(bios, args.out) save_raw(bios, args.out)
save_individuals(bios, args.out) save_individuals(bios, args.out)
@ -156,12 +158,82 @@ def main():
if repo.git.diff(name_only=True) == "": if repo.git.diff(name_only=True) == "":
return return
message = generate_commit_message(old_bios, bios)
repo.git.add("*") repo.git.add("*")
repo.index.commit(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) repo.index.commit(message)
origin = repo.remote(name="origin") origin = repo.remote(name="origin")
origin.push() origin.push()
def load_old_bios(out):
try:
with open(f"{out}/raw.json", "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
return []
def generate_commit_message(old_bios, new_bios):
old_names = {tuple(b["name"]): b for b in old_bios}
new_names = {tuple(b.name): b for b in new_bios}
added = [new_names[n] for n in new_names if n not in old_names]
removed = [old_names[n] for n in old_names if n not in new_names]
new_disclosures = []
for bio in new_bios:
key = tuple(bio.name)
if key not in old_names:
continue
old_discl = old_names[key].get("disclosures") or []
new_discl = bio.disclosures or []
old_items = {item for d in old_discl for item in d[1]}
new_items = {item for d in new_discl for item in d[1]}
for item in new_items - old_items:
new_disclosures.append((bio, item))
party_changes = []
for bio in new_bios:
key = tuple(bio.name)
if key not in old_names:
continue
old_party = old_names[key].get("party", "")
if old_party != bio.party:
party_changes.append((bio, old_party))
date = datetime.now().strftime("%Y-%m-%d")
sections = []
if added:
sections.append("Neue Abgeordnete:\n" + "\n".join(
f"- {b.name[1]} {b.name[0]} ({b.party})" for b in added
))
if removed:
sections.append("Ausgeschieden:\n" + "\n".join(
f"- {b['name'][1]} {b['name'][0]} ({b.get('party', '')})" for b in removed
))
if party_changes:
sections.append("Parteiwechsel:\n" + "\n".join(
f"- {b.name[1]} {b.name[0]}: {old} -> {b.party}"
for b, old in party_changes
))
if new_disclosures:
sections.append("Neue Veröffentlichungen:\n" + "\n".join(
f"- {b.name[1]} {b.name[0]} ({b.party}): {item}"
for b, item in new_disclosures
))
updated = len(new_bios)
sections.append(f"{updated} Profile aktualisiert")
title = f"Aktualisierung {date}"
body = "\n\n".join(sections)
return f"{title}\n\n{body}"
def save_individuals(bios, out): def save_individuals(bios, out):
for rep in bios: for rep in bios:
first_letter = rep.name[0][0].upper() first_letter = rep.name[0][0].upper()