generate descriptive commit messages by diffing against previous run
Loads the previous raw.json before saving, compares against current crawl, and generates commit messages listing: new/departed representatives, party changes, new disclosures, and total profiles updated. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e5a43977c1
commit
fb38bb5894
1 changed files with 73 additions and 1 deletions
74
crawler.py
74
crawler.py
|
|
@ -144,6 +144,8 @@ def main():
|
||||||
|
|
||||||
bios = [get_bio(link, name, sleep_for) for link, name in zip(links, names)]
|
bios = [get_bio(link, name, sleep_for) for link, name in zip(links, names)]
|
||||||
|
|
||||||
|
old_bios = load_old_bios(args.out)
|
||||||
|
|
||||||
if not args.debug:
|
if not args.debug:
|
||||||
save_raw(bios, args.out)
|
save_raw(bios, args.out)
|
||||||
save_individuals(bios, args.out)
|
save_individuals(bios, args.out)
|
||||||
|
|
@ -156,12 +158,82 @@ def main():
|
||||||
if repo.git.diff(name_only=True) == "":
|
if repo.git.diff(name_only=True) == "":
|
||||||
return
|
return
|
||||||
|
|
||||||
|
message = generate_commit_message(old_bios, bios)
|
||||||
repo.git.add("*")
|
repo.git.add("*")
|
||||||
repo.index.commit(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
repo.index.commit(message)
|
||||||
origin = repo.remote(name="origin")
|
origin = repo.remote(name="origin")
|
||||||
origin.push()
|
origin.push()
|
||||||
|
|
||||||
|
|
||||||
|
def load_old_bios(out):
|
||||||
|
try:
|
||||||
|
with open(f"{out}/raw.json", "r", encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def generate_commit_message(old_bios, new_bios):
|
||||||
|
old_names = {tuple(b["name"]): b for b in old_bios}
|
||||||
|
new_names = {tuple(b.name): b for b in new_bios}
|
||||||
|
|
||||||
|
added = [new_names[n] for n in new_names if n not in old_names]
|
||||||
|
removed = [old_names[n] for n in old_names if n not in new_names]
|
||||||
|
|
||||||
|
new_disclosures = []
|
||||||
|
for bio in new_bios:
|
||||||
|
key = tuple(bio.name)
|
||||||
|
if key not in old_names:
|
||||||
|
continue
|
||||||
|
old_discl = old_names[key].get("disclosures") or []
|
||||||
|
new_discl = bio.disclosures or []
|
||||||
|
old_items = {item for d in old_discl for item in d[1]}
|
||||||
|
new_items = {item for d in new_discl for item in d[1]}
|
||||||
|
for item in new_items - old_items:
|
||||||
|
new_disclosures.append((bio, item))
|
||||||
|
|
||||||
|
party_changes = []
|
||||||
|
for bio in new_bios:
|
||||||
|
key = tuple(bio.name)
|
||||||
|
if key not in old_names:
|
||||||
|
continue
|
||||||
|
old_party = old_names[key].get("party", "")
|
||||||
|
if old_party != bio.party:
|
||||||
|
party_changes.append((bio, old_party))
|
||||||
|
|
||||||
|
date = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
sections = []
|
||||||
|
|
||||||
|
if added:
|
||||||
|
sections.append("Neue Abgeordnete:\n" + "\n".join(
|
||||||
|
f"- {b.name[1]} {b.name[0]} ({b.party})" for b in added
|
||||||
|
))
|
||||||
|
|
||||||
|
if removed:
|
||||||
|
sections.append("Ausgeschieden:\n" + "\n".join(
|
||||||
|
f"- {b['name'][1]} {b['name'][0]} ({b.get('party', '')})" for b in removed
|
||||||
|
))
|
||||||
|
|
||||||
|
if party_changes:
|
||||||
|
sections.append("Parteiwechsel:\n" + "\n".join(
|
||||||
|
f"- {b.name[1]} {b.name[0]}: {old} -> {b.party}"
|
||||||
|
for b, old in party_changes
|
||||||
|
))
|
||||||
|
|
||||||
|
if new_disclosures:
|
||||||
|
sections.append("Neue Veröffentlichungen:\n" + "\n".join(
|
||||||
|
f"- {b.name[1]} {b.name[0]} ({b.party}): {item}"
|
||||||
|
for b, item in new_disclosures
|
||||||
|
))
|
||||||
|
|
||||||
|
updated = len(new_bios)
|
||||||
|
sections.append(f"{updated} Profile aktualisiert")
|
||||||
|
|
||||||
|
title = f"Aktualisierung {date}"
|
||||||
|
body = "\n\n".join(sections)
|
||||||
|
return f"{title}\n\n{body}"
|
||||||
|
|
||||||
|
|
||||||
def save_individuals(bios, out):
|
def save_individuals(bios, out):
|
||||||
for rep in bios:
|
for rep in bios:
|
||||||
first_letter = rep.name[0][0].upper()
|
first_letter = rep.name[0][0].upper()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue