fix disclosuure handling

This commit is contained in:
Marco Lents 2025-11-15 15:30:07 +01:00
parent 93310a8030
commit 3f0130c75b

View file

@ -88,8 +88,7 @@ Biographie: {self.cv}
# Mandat
{self.mandate[0]}, {self.mandate[1]}
# Veröffentlichungspflichtige Angaben
{self.disclosures.replace(DISCLOSURE_DISLAIMER, "")}
# Veröffentlichungspflichtige Angaben {funcs_to_str(self.disclosures)}
"""
return txt
@ -125,6 +124,7 @@ def main():
)
parser.add_argument("-o", "--out")
parser.add_argument("--debug", action="store_true")
parser.add_argument("--no-git", action="store_true")
args = parser.parse_args()
if not args.out:
raise ValueError("must supply out directory")
@ -142,6 +142,9 @@ def main():
save_raw(bios, args.out)
save_individuals(bios, args.out)
if args.no_git:
return
repo.git.add("*")
repo.index.commit(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
origin = repo.remote(name="origin")
@ -209,7 +212,7 @@ def get_bio(url, name, sleep_for):
soup.find(class_="m-biography__subHeading --mandate").text,
soup.find(string=re.compile(r"^Wahlkreis \d*:")),
)
disclosures = soup.find(class_="m-biography__infoDisclaimer").text.strip()
disclosures = get_disclosures(soup.find(class_="m-biography__infos"))
bio = Biography(
name,
@ -228,6 +231,30 @@ def get_bio(url, name, sleep_for):
return bio
def get_disclosures(elem):
if not elem:
return None
divs = elem.find_all("div", class_=lambda cls: cls != "m-biography__infoDisclaimer")
out = []
for div in divs:
current_heading = ""
current_body = []
for child in div.children:
if child.name == "h3":
if current_body != []:
out.append((current_heading, current_body))
current_heading = child.text.strip()
current_body = []
continue
if not child.name:
continue
if child.text.strip() == "":
continue
current_body.append(child.text.strip())
out.append((current_heading, current_body))
return out
def get_functions(elem):
out = []
current_heading = None