aboutsummaryrefslogtreecommitdiff
path: root/mkbinder.py
diff options
context:
space:
mode:
authorAnthony Wang2020-12-13 21:23:26 -0600
committerAnthony Wang2020-12-13 21:23:26 -0600
commitf513be7de6a3d69b88d092a3f0bf9e6bc74eac1c (patch)
treeee7d272dae0de1a06f28e1a0959b466034b9b12f /mkbinder.py
parent68e7e630a4e607be8cc2c0e5abcc9add9553ce7a (diff)
mkbinder is ready!
Diffstat (limited to 'mkbinder.py')
-rwxr-xr-xmkbinder.py58
1 files changed, 45 insertions, 13 deletions
diff --git a/mkbinder.py b/mkbinder.py
index 5e39eba..282c8cd 100755
--- a/mkbinder.py
+++ b/mkbinder.py
@@ -4,6 +4,9 @@ import weasyprint
import re
import os
import argparse
+import pickle
+from htmldate import find_date
+from datetime import date
# CLI arguments
@@ -22,22 +25,51 @@ for filename in os.listdir("."):
os.mkdir(filename[:-4])
except:
pass
+
+ try:
+ dates = pickle.load(open(filename[:-4] + ".pickle", 'rb'))
+ except:
+ dates = {}
file = open(filename, "r")
links = file.readlines()
for link in links:
- print("Downloading: " + link)
- name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf")
- # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf"
- # print(name)
+ new_date_str = find_date(link[:-1])
+
+ if new_date_str == None:
+ new_date = date.fromisoformat("9999-01-01")
+ else:
+ new_date = date.fromisoformat(new_date_str)
+
try:
- # weasyprint seems faster?
- if args.backend == 'pdfkit':
- pdfkit.from_url(link, name)
- else:
- pdf = weasyprint.HTML(link).write_pdf()
- open(name, 'wb').write(pdf)
+ old_date = dates[link[:-1]]
except:
- # Ignore exceptions
- # Probably not a good idea
- pass
+ old_date = date.fromisoformat("1970-01-01")
+
+
+ if new_date > old_date:
+ print("Downloading: " + link[:-1])
+ print("Edit date: " + str(new_date))
+
+ name = os.path.join(filename[:-4], re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf")
+ # name = re.sub(r'(?u)[^-\w.]', '', link[5:]) + ".pdf"
+ # print(name)
+ try:
+ # weasyprint seems faster?
+ if args.backend == 'pdfkit':
+ pdfkit.from_url(link, name)
+ else:
+ pdf = weasyprint.HTML(link).write_pdf()
+ open(name, 'wb').write(pdf)
+ except:
+ # Ignore exceptions
+ # Probably not a good idea
+ pass
+
+
+ if new_date != date.fromisoformat("9999-01-01"):
+ dates[link[:-1]] = new_date
+
+
+ pickle.dump(dates, open(filename[:-4] + ".pickle", 'wb'))
+