git functionality
This commit is contained in:
parent
19cdfb486d
commit
aaa372fe21
1 changed files with 7 additions and 6 deletions
13
crawler.py
13
crawler.py
|
|
@ -4,8 +4,9 @@ import json
|
|||
from bs4 import BeautifulSoup
|
||||
from time import sleep
|
||||
from os.path import commonprefix
|
||||
from os import makedirs
|
||||
from git import Repo
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
BUNDESTAG_URL = "https://www.bundestag.de/ajax/filterlist/de/abgeordnete/biografien/1040594-1040594?limit=9999&view=BTBiographyList"
|
||||
BUNDESTAG_BASE_URL = "https://www.bundestag.de"
|
||||
|
|
@ -72,11 +73,7 @@ def main():
|
|||
args = parser.parse_args()
|
||||
if not args.out:
|
||||
raise ValueError("must supply out directory")
|
||||
try:
|
||||
makedirs(args.out)
|
||||
except FileExistsError:
|
||||
print("Path already exists")
|
||||
pass
|
||||
repo = Repo(args.out)
|
||||
links, names = get_links_and_names()
|
||||
if args.debug:
|
||||
links = links[:5]
|
||||
|
|
@ -84,6 +81,10 @@ def main():
|
|||
bios = [get_bio(link, name) for link, name in zip(links, names)]
|
||||
|
||||
save_info(bios, args.out)
|
||||
repo.git.add("*")
|
||||
repo.index.commit(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
origin = repo.remote(name="origin")
|
||||
origin.push()
|
||||
|
||||
|
||||
def save_info(bios, out):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue