Skip to content

Instantly share code, notes, and snippets.

@jeeger
Last active November 26, 2022 15:37
Show Gist options
  • Save jeeger/d13159fefaee33c771be979639900ebc to your computer and use it in GitHub Desktop.
Save jeeger/d13159fefaee33c771be979639900ebc to your computer and use it in GitHub Desktop.
Sum up page counts of books from Calibre library with babashka
#!/usr/bin/env bb
(ns bookstats
(:require [babashka.pods :as pods]
[clojure.pprint :as pprint]))
(pods/load-pod 'org.babashka/go-sqlite3 "0.1.0")
(pods/load-pod 'retrogradeorbit/bootleg "0.1.9")
(require '[pod.babashka.go-sqlite3 :as sqlite]
'[pod.retrogradeorbit.hickory.select :as s]
'[pod.retrogradeorbit.bootleg.utils :as utils])
(import [java.net URLEncoder]
[java.time.temporal ChronoUnit]
[java.time Instant]
[java.text SimpleDateFormat])
(defn date-after [arg]
(fn [row]
(> (.compareTo (:timestamp row) arg) 0)))
(defn books-since [database since]
(filter (date-after since)
(sqlite/query database "select b.title, group_concat(a.name, ', ') as authors, b.timestamp from books b join books_authors_link bal on b.id = bal.book join authors a on bal.author = a.id group by b.id")))
(defn book-urls [title authors]
(let [search-url (str "https://www.goodreads.com/search?q=" (URLEncoder/encode (str title " " authors) "utf-8") "&search_type=books")]
(as-> (slurp search-url) val
(utils/convert-to val :hickory-seq)
(mapcat #(s/select (s/and (s/tag "a") (s/class "bookTitle")) %) val)
(map #(get-in % [:attrs :href]) val)
(map #(str "https://goodreads.com" %) val))))
(defn extract-page-count [url]
(try
(as-> (slurp url) val
(utils/convert-to val :hickory-seq)
(mapcat (fn [elem] (s/select (s/and (s/tag "span") (s/attr "itemprop" #(= % "numberOfPages"))) elem)) val)
(first val)
(get val :content)
(first val)
(re-find #"(\d+) pages" val)
(second val)
(Integer/parseInt val))
(catch Exception e
(println (str "Error parsing page count from " url ": " e))
nil)))
(defn book-with-page-count [{:keys [title authors]}]
(let [page-count (some extract-page-count (book-urls title authors))]
{:title title
:authors authors
:pages page-count}))
(defn books-since-report [database since]
(let [books-to-print (books-since database since)]
(map book-with-page-count books-to-print)))
(defn pretty-print-report [database since]
(let [table (books-since-report database since)
total (reduce + (map #(:pages %) table))
perday (/ total (.between ChronoUnit/DAYS (.toInstant since) (Instant/now)))]
(pprint/print-table table)
(print (str "Total pages: " total ",per day: " (float perday)))))
(let [date-format (SimpleDateFormat. "yyyy-MM-dd")]
(pretty-print-report (first *command-line-args*) (.parse date-format (second *command-line-args*))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment