#!/usr/bin/env -S nix-shell # -*- python -*-
#!nix-shell -i python3 -p "python3.withPackages(ps: with ps; [ beautifulsoup4 html5lib tqdm ])"
# coding: utf-8

import os.path as osp
import sys
from argparse import ArgumentParser

from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
from tqdm import tqdm

import warnings
warnings.filterwarnings(action='ignore',
                        category=XMLParsedAsHTMLWarning,
                        module='bs4')

ap = ArgumentParser(description='Convert Amazon Highlights HTML to an Org file')
ap.add_argument('-a', '--append',
                action='store_true',
                default=False,
                help='append to output file, do not create (default, create)')

ap.add_argument('-t', '--title',
                dest='title_format',
                default='{heading}',
                help='format string for headers: {title}, {authors} and {heading} are available')

ap.add_argument('-d', '--depth',
                dest='heading_depth',
                default=1,
                type=int,
                help='how deep are the headings')

ap.add_argument('in_file',
                type=str,
                help='input HTML file from Amazon Kindle highlights')

ap.add_argument('out_file',
                type=str,
                help='output Org file')

args = ap.parse_args()

heading_stars = '*' * args.heading_depth

if not osp.exists(args.in_file):
    print(f"File \"{args.in_file}\" does not exist.", file = sys.stderr)
    sys.exit(1)

with open(args.in_file) as fp:
    soup = BeautifulSoup(fp, 'html5lib')

title = soup.find("div", "bookTitle").string.strip()
authors = ' and '.join(list(soup.find("div", "authors").strings)).strip()

note_headings = soup.find_all("div", "noteHeading")
notes = soup.find_all("div", "noteText")

last_heading = ''

with open(args.out_file, 'a' if args.append else 'w') as fp:
    for (heading, note) in tqdm(zip(note_headings, notes)):
        heading_name = ''.join(list(heading.strings)).strip()
        if heading_name != last_heading:
            heading_title = args.title_format.format(title=title,
                                                     authors=authors,
                                                     heading=heading_name)
            fp.write(f"\n\n{heading_stars} {heading_title}\n\n")
        last_heading = heading_name
        note_text = ''.join(list(note.strings)).strip()
        fp.write(note_text)
    fp.write('\n')