aboutsummaryrefslogtreecommitdiff
path: root/Astro 2021/mkbinder.py
diff options
context:
space:
mode:
Diffstat (limited to 'Astro 2021/mkbinder.py')
-rw-r--r--Astro 2021/mkbinder.py78
1 files changed, 78 insertions, 0 deletions
diff --git a/Astro 2021/mkbinder.py b/Astro 2021/mkbinder.py
new file mode 100644
index 0000000..82772a0
--- /dev/null
+++ b/Astro 2021/mkbinder.py
@@ -0,0 +1,78 @@
+#!.venv/bin/python3
+import pdfkit
+import weasyprint
+import re
+import os
+import argparse
+import pickle
+from htmldate import find_date
+from datetime import date
+
+
+# CLI arguments
+parser = argparse.ArgumentParser()
+parser.add_argument('--backend', '-b', dest = 'backend', help = 'change the download backend; default: pdfkit', default = 'pdfkit', choices = ['pdfkit', 'weasyprint'])
+parser.add_argument('--force', '-f', dest = 'force', help = 'force download all links instead of only the ones that need to be updated; default: False', default = False, choices = [False, True])
+args = parser.parse_args()
+
+
+for filename in os.listdir("Links"):
+ if not filename.endswith(".txt"): continue
+ if filename == "requirements.txt": continue
+
+ print("Examining: " + filename)
+
+ try:
+ os.mkdir(filename[:-4])
+ except: # I love bad error handling
+ pass
+
+ try:
+ dates = pickle.load(open(os.path.join("Links", filename[:-4] + ".pickle"), 'rb'))
+ except:
+ dates = {}
+
+ file = open(os.path.join("Links", filename), "r")
+ links = file.readlines()
+ for link in links:
+ if link[0] == "#" or link[0] == "\n": continue
+
+ new_date_str = find_date(link[:-1])
+
+ if new_date_str == None:
+ new_date = date.fromisoformat("9999-01-01")
+ else:
+ new_date = date.fromisoformat(new_date_str)
+
+ try:
+ old_date = dates[link[:-1]]
+ except:
+ old_date = date.fromisoformat("1970-01-01")
+
+
+ if new_date > old_date or args.force:
+ print("Downloading: " + link[:-1])
+ print("Edit date: " + str(new_date))
+
+ name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf")
+ # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf"
+ # print(name)
+ try:
+ # weasyprint seems faster?
+ # but seems to be broken sometimes???
+ if args.backend == 'pdfkit':
+ pdfkit.from_url(link, name)
+ else:
+ pdf = weasyprint.HTML(link).write_pdf()
+ open(name, 'wb').write(pdf)
+ except: # Maybe should handle errors a little bit better?
+ print("Error when printing")
+ pass
+
+
+ if new_date != date.fromisoformat("9999-01-01"):
+ dates[link[:-1]] = new_date
+
+
+ pickle.dump(dates, open(os.path.join("Links", filename[:-4] + ".pickle"), 'wb'))
+