Browse Source

Add a kindle notes to org script

Samuel W. Flint 2 years ago
parent
commit
d9ddc7bf46
1 changed files with 72 additions and 0 deletions
  1. 72 0
      kindle-notes-to-org

+ 72 - 0
kindle-notes-to-org

@@ -0,0 +1,72 @@
+#!/usr/bin/env -S nix-shell # -*- python -*-
+#!nix-shell -i python3 -p "python3.withPackages(ps: with ps; [ beautifulsoup4 html5lib tqdm ])"
+# coding: utf-8
+
+import os.path as osp
+import sys
+from argparse import ArgumentParser
+
+from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
+from tqdm import tqdm
+
+import warnings
+warnings.filterwarnings(action='ignore',
+                        category=XMLParsedAsHTMLWarning,
+                        module='bs4')
+
+ap = ArgumentParser(description='Convert Amazon Highlights HTML to an Org file')
+ap.add_argument('-a', '--append',
+                action='store_true',
+                default=False,
+                help='append to output file, do not create (default, create)')
+
+ap.add_argument('-t', '--title',
+                dest='title_format',
+                default='{heading}',
+                help='format string for headers: {title}, {authors} and {heading} are available')
+
+ap.add_argument('-d', '--depth',
+                dest='heading_depth',
+                default=1,
+                type=int,
+                help='how deep are the headings')
+
+ap.add_argument('in_file',
+                type=str,
+                help='input HTML file from Amazon Kindle highlights')
+
+ap.add_argument('out_file',
+                type=str,
+                help='output Org file')
+
+args = ap.parse_args()
+
+heading_stars = '*' * args.heading_depth
+
+if not osp.exists(args.in_file):
+    print(f"File \"{args.in_file}\" does not exist.", file = sys.stderr)
+    sys.exit(1)
+
+with open(args.in_file) as fp:
+    soup = BeautifulSoup(fp, 'html5lib')
+
+title = soup.find("div", "bookTitle").string.strip()
+authors = ' and '.join(list(soup.find("div", "authors").strings)).strip()
+
+note_headings = soup.find_all("div", "noteHeading")
+notes = soup.find_all("div", "noteText")
+
+last_heading = ''
+
+with open(args.out_file, 'a' if args.append else 'w') as fp:
+    for (heading, note) in tqdm(zip(note_headings, notes)):
+        heading_name = ''.join(list(heading.strings)).strip()
+        if heading_name != last_heading:
+            heading_title = args.title_format.format(title=title,
+                                                     authors=authors,
+                                                     heading=heading_name)
+            fp.write(f"\n\n{heading_stars} {heading_title}\n\n")
+        last_heading = heading_name
+        note_text = ''.join(list(note.strings)).strip()
+        fp.write(note_text)
+    fp.write('\n')