diff --git a/crawler.py b/crawler.py index 69fa8fe..fa760b6 100644 --- a/crawler.py +++ b/crawler.py @@ -151,6 +151,9 @@ def main(): save_individuals(bios, args.out) save_votes(bios, args.out) save_disclosures(bios, args.out) + save_readme(bios, args.out) + save_party_index(bios, args.out) + save_letter_indexes(bios, args.out) if args.no_git: return @@ -315,6 +318,65 @@ def save_votes(bios, out): f.write(md) +def save_readme(bios, out): + date = datetime.now().strftime("%Y-%m-%d %H:%M") + total = len(bios) + parties = {} + for bio in bios: + parties[bio.party] = parties.get(bio.party, 0) + 1 + + md = f"# Bundestag\n\n" + md += f"Automatisch erfasste Daten der Abgeordneten des Deutschen Bundestages.\n\n" + md += f"**Abgeordnete:** {total} \n" + md += f"**Letzte Aktualisierung:** {date}\n\n" + md += "## Parteien\n\n" + md += "| Partei | Abgeordnete |\n" + md += "|--------|------------:|\n" + for party, count in sorted(parties.items()): + md += f"| [{party}](Parteien/{party}.md) | {count} |\n" + md += f"\n## Verzeichnisse\n\n" + md += "- [Abgeordnete](Abgeordnete/) — Einzelprofile nach Nachname\n" + md += "- [Abstimmungen](Abstimmungen/) — Abstimmungen nach Thema\n" + md += "- [Veröffentlichungspflichtige Angaben](Voep_Angaben/) — Nebentätigkeiten\n" + md += "- [Parteien](Parteien/) — Abgeordnete nach Partei\n" + + with open(f"{out}/README.md", "w", encoding="utf-8") as f: + f.write(md) + + +def save_party_index(bios, out): + dir = f"{out}/Parteien" + makedirs(dir, exist_ok=True) + for party, bio_list in group_by_party(bios): + md = f"# {party}\n\n" + md += f"{len(bio_list)} Abgeordnete\n\n" + for bio in sorted(bio_list, key=lambda b: b.name): + first_letter = bio.name[0][0].upper() + name_str = f"{bio.name[0]} {bio.name[1]}".replace(" ", "_") + md += f"- [{bio.name[1]} {bio.name[0]}](../Abgeordnete/{first_letter}/{name_str}.md)" + if bio.job: + md += f" — {bio.job}" + md += "\n" + with open(f"{dir}/{party}.md", "w", encoding="utf-8") as f: + f.write(md) + + +def save_letter_indexes(bios, out): + by_letter = {} + for bio in bios: + letter = bio.name[0][0].upper() + if letter not in by_letter: + by_letter[letter] = [] + by_letter[letter].append(bio) + for letter, bio_list in sorted(by_letter.items()): + md = f"# {letter}\n\n" + for bio in sorted(bio_list, key=lambda b: b.name): + name_str = f"{bio.name[0]} {bio.name[1]}".replace(" ", "_") + md += f"- [{bio.name[1]} {bio.name[0]}]({name_str}.md) ({bio.party})\n" + with open(f"{out}/Abgeordnete/{letter}/index.md", "w", encoding="utf-8") as f: + f.write(md) + + def group_by_party(bios): grouped = {} for bio in bios: