diff options
Diffstat (limited to 'Astro 2021/mkbinder.py')
-rw-r--r-- | Astro 2021/mkbinder.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/Astro 2021/mkbinder.py b/Astro 2021/mkbinder.py new file mode 100644 index 0000000..82772a0 --- /dev/null +++ b/Astro 2021/mkbinder.py @@ -0,0 +1,78 @@ +#!.venv/bin/python3 +import pdfkit +import weasyprint +import re +import os +import argparse +import pickle +from htmldate import find_date +from datetime import date + + +# CLI arguments +parser = argparse.ArgumentParser() +parser.add_argument('--backend', '-b', dest = 'backend', help = 'change the download backend; default: pdfkit', default = 'pdfkit', choices = ['pdfkit', 'weasyprint']) +parser.add_argument('--force', '-f', dest = 'force', help = 'force download all links instead of only the ones that need to be updated; default: False', default = False, choices = [False, True]) +args = parser.parse_args() + + +for filename in os.listdir("Links"): + if not filename.endswith(".txt"): continue + if filename == "requirements.txt": continue + + print("Examining: " + filename) + + try: + os.mkdir(filename[:-4]) + except: # I love bad error handling + pass + + try: + dates = pickle.load(open(os.path.join("Links", filename[:-4] + ".pickle"), 'rb')) + except: + dates = {} + + file = open(os.path.join("Links", filename), "r") + links = file.readlines() + for link in links: + if link[0] == "#" or link[0] == "\n": continue + + new_date_str = find_date(link[:-1]) + + if new_date_str == None: + new_date = date.fromisoformat("9999-01-01") + else: + new_date = date.fromisoformat(new_date_str) + + try: + old_date = dates[link[:-1]] + except: + old_date = date.fromisoformat("1970-01-01") + + + if new_date > old_date or args.force: + print("Downloading: " + link[:-1]) + print("Edit date: " + str(new_date)) + + name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf") + # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf" + # print(name) + try: + # weasyprint seems faster? + # but seems to be broken sometimes??? + if args.backend == 'pdfkit': + pdfkit.from_url(link, name) + else: + pdf = weasyprint.HTML(link).write_pdf() + open(name, 'wb').write(pdf) + except: # Maybe should handle errors a little bit better? + print("Error when printing") + pass + + + if new_date != date.fromisoformat("9999-01-01"): + dates[link[:-1]] = new_date + + + pickle.dump(dates, open(os.path.join("Links", filename[:-4] + ".pickle"), 'wb')) + |