Skip to content

Commit a8b2ca8

Browse files
committed
Add offline search with Pagefind.app
Run `bb index` to build the search index & supporting files, then serve `./search.html` + the generated `./pagefind/` to be able to search. Uses the pagefind Node lib to be able to index .edn files as "custom records". Written with Squint, so that I don't need to write JS :-)
1 parent d80b62e commit a8b2ca8

6 files changed

+490
-2
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ nongit/
88
.clj-kondo/
99
.nrepl-port
1010
.portal/
11-
.vscode/
11+
.vscode/
12+
build-pagefind-index.mjs
13+
pagefind/

bb.edn

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
{:tasks {run {:doc "Download posts added after the previous run"
2+
:override-builtin true
23
:task (clojure "-M" "-m" "clj-tumblr-summarizer.main")}
34
summarize {:doc "Create an HTML summary of (already downloaded) posts from the previous month"
45
:task (clojure "-X:summarize")} ; also: clojure "-X:summarize" ":date" '"2022-12"'
56
summarize-m {:doc "Create an HTML summary of (already downloaded) posts from AND month given as CLI arg like 2022-12"
67
:task (when-let [md (first *command-line-args*)]
78
(assert (re-matches (re-pattern "^\\d{4}-\\d{2}$") md) "The month arg must match yyyy-mm")
8-
(clojure (format "-X:summarize :date '\"%s\"'" md)))}}}
9+
(clojure (format "-X:summarize :date '\"%s\"'" md)))}
10+
index {:doc "Build Pagefind.app index for static search of the records"
11+
:task (shell "npx squint run build-pagefind-index.cljs")}}}

build-pagefind-index.cljs

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
;; TODO: Add to the index incrementally, only new stuff => faster; currently takes 3 min
2+
(ns build-pagefind-index
3+
"Build a pagefind search index from .edn Tumblr entries previously downloaded
4+
into /data, together with the supporting .js, .css and other files.
5+
Serve statically ./search.html and ./pagefind/ to use the search."
6+
{:clj-kondo/ignore true}
7+
(:require
8+
["fs" :as fs]
9+
["node:fs/promises" :refer [readdir readFile]]
10+
["url" :refer [fileURLToPath]]
11+
["pagefind" :as pagefind]
12+
["edn-data" :refer [parseEDNString]]))
13+
14+
(defn entry->record [{:keys [tags summary content timestamp post_url date] :as _entry}]
15+
(try
16+
(let [date (when date (.substring date 0 10))
17+
content (-> (.map content (fn [{:keys [text description]}]
18+
;; type 'link' -> description, text -> text
19+
(or text description)))
20+
(.join "\n"))]
21+
{:url post_url
22+
:content content
23+
:language "en"
24+
;; optional:
25+
:meta (cond-> {:title summary
26+
;; These below will be displayed below the result
27+
:date date #_image}
28+
(seq tags)
29+
(assoc! :tags (-> tags (.sort) (.join ", "))))
30+
:filters {:tags tags}
31+
:sort {:date date}})
32+
(catch :default err
33+
(println "ERROR processing entry" _entry ":" (.-message err))
34+
(throw err))))
35+
36+
(defn ^:async index-all []
37+
(let [entries (js-await (readdir "data"))
38+
index (.-index (js-await (pagefind/createIndex)))]
39+
;; Add all entries to the index:
40+
(->
41+
(for [e entries
42+
:when (.endsWith e ".edn")]
43+
(-> (readFile (str "data/" e) {:encoding "utf8"})
44+
(.then #(parseEDNString % {:mapAs "object" :keywordAs "string"}))
45+
(.then entry->record)
46+
(.then #(.addCustomRecord index %))))
47+
(Promise/all)
48+
(js-await))
49+
;; Save the index:
50+
(js-await (.writeFiles index {:outputPath "pagefind"}))
51+
(println "Index written with" (count entries) "entries")
52+
(js-await (.close pagefind))))
53+
54+
(js-await (index-all))

package.json

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"name": "clj-tumblr-summarizer",
3+
"version": "1.0.0",
4+
"description": "A tool / library that progressively backs up posts from a Tumblr.com micro-blog and can produce monthly summaries of the published posts in HTML. Can be run as a scheduled GitHub Action.",
5+
"main": "index.js",
6+
"scripts": {
7+
"test": "echo \"Error: no test specified\" && exit 1"
8+
},
9+
"keywords": [],
10+
"author": "",
11+
"license": "ISC",
12+
"dependencies": {
13+
"edn-data": "^1.0.0",
14+
"pagefind": "^1.0.3",
15+
"squint-cljs": "^0.3.35"
16+
}
17+
}

search.html

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Search entries</title>
6+
<link href="/pagefind/pagefind-ui.css" rel="stylesheet">
7+
<script src="/pagefind/pagefind-ui.js"></script>
8+
<body>
9+
<h1>Search entries</h1>
10+
<div id="search"></div>
11+
<script>
12+
window.addEventListener('DOMContentLoaded', (event) => {
13+
new PagefindUI({
14+
element: "#search",
15+
showSubResults: true,
16+
excerptLength: 100,
17+
showEmptyFilters: false,
18+
baseUrl: "/",
19+
processResult: function (result) {
20+
result.url = result.url.substring(1); // drop the leading / that pagefind adds
21+
return result;
22+
}
23+
});
24+
});
25+
</script>
26+
</body>
27+
</html>

0 commit comments

Comments
 (0)