diff options
author | Anthony Wang | 2020-12-13 21:23:26 -0600 |
---|---|---|
committer | Anthony Wang | 2020-12-13 21:23:26 -0600 |
commit | f513be7de6a3d69b88d092a3f0bf9e6bc74eac1c (patch) | |
tree | ee7d272dae0de1a06f28e1a0959b466034b9b12f /mkbinder.py | |
parent | 68e7e630a4e607be8cc2c0e5abcc9add9553ce7a (diff) |
mkbinder is ready!
Diffstat (limited to 'mkbinder.py')
-rwxr-xr-x | mkbinder.py | 58 |
1 files changed, 45 insertions, 13 deletions
diff --git a/mkbinder.py b/mkbinder.py index 5e39eba..282c8cd 100755 --- a/mkbinder.py +++ b/mkbinder.py @@ -4,6 +4,9 @@ import weasyprint import re import os import argparse +import pickle +from htmldate import find_date +from datetime import date # CLI arguments @@ -22,22 +25,51 @@ for filename in os.listdir("."): os.mkdir(filename[:-4]) except: pass + + try: + dates = pickle.load(open(filename[:-4] + ".pickle", 'rb')) + except: + dates = {} file = open(filename, "r") links = file.readlines() for link in links: - print("Downloading: " + link) - name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf") - # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf" - # print(name) + new_date_str = find_date(link[:-1]) + + if new_date_str == None: + new_date = date.fromisoformat("9999-01-01") + else: + new_date = date.fromisoformat(new_date_str) + try: - # weasyprint seems faster? - if args.backend == 'pdfkit': - pdfkit.from_url(link, name) - else: - pdf = weasyprint.HTML(link).write_pdf() - open(name, 'wb').write(pdf) + old_date = dates[link[:-1]] except: - # Ignore exceptions - # Probably not a good idea - pass + old_date = date.fromisoformat("1970-01-01") + + + if new_date > old_date: + print("Downloading: " + link[:-1]) + print("Edit date: " + str(new_date)) + + name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf") + # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf" + # print(name) + try: + # weasyprint seems faster? + if args.backend == 'pdfkit': + pdfkit.from_url(link, name) + else: + pdf = weasyprint.HTML(link).write_pdf() + open(name, 'wb').write(pdf) + except: + # Ignore exceptions + # Probably not a good idea + pass + + + if new_date != date.fromisoformat("9999-01-01"): + dates[link[:-1]] = new_date + + + pickle.dump(dates, open(filename[:-4] + ".pickle", 'wb')) + |