Last active
July 4, 2024 09:02
-
-
Save eddyb/5cba94f9fb6cea5bc28d1a5a7c137d59 to your computer and use it in GitHub Desktop.
Matrix offline regex search (using JSON exports from Element)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env fish | |
# FIXME(eddyb) get this working in bash! | |
bash -c 'set -eu; eval "$(jq -nr "$1" --args -- "${@:2}")"' bash \ | |
'$ARGS.positional | ((index("--") // length) as $i | [.[:$i],.[$i+1:]]) as [$opts, $rest] | [($opts[] | ((select(startswith("--")) | ((index("=") // length) as $j | [.[:$j],.[$j+1:]]) as [$name, $value] | {($name | sub("^--";"")):$value})//{"ARGS":.})), ($rest[] | {"ARGS":.})] | map(to_entries[]) | reduce .[] as $e ({};.[$e.key]+=[$e.value]) | to_entries | map("\(.key)=\(.value | @sh"(\(.))")")[], "eval \"$top_cmd\""' \ | |
--top_cmd='eval "${begin[@]}"; eval "${in[@]}" | eval "${process[@]}" | eval "${out[@]}"; eval "${end[@]}"' \ | |
--in='zstd -d < "$messages_jsonl_zst"' \ | |
--process='rg -C "${context:-0}" --context-separator=-SNIP- -- "$re_prefilter" | jq --raw-output0 "${jq[@]}" | rg --null-data -C "${context:-0}" --context-separator="$table_separator" -- "$re_postfilter"' \ | |
--begin='export regex="$ARGS"; [ ${#ARGS[@]} -eq 1 ] || (echo "ERR: expected 1 argument (search regex), found ${#ARGS[@]}: ${ARGS[@]}"; exit 1);' \ | |
--begin='re_prefilter="$(jq "${jq_validate_re_prefilter[@]}" || echo)";' \ | |
--jq_validate_re_prefilter={'-nr','$ENV.regex | (([scan($re_scan)] | (select(join("")==$ENV.regex)//(@json"BUG: incomplete regex scan\noriginal: \($ENV.regex)\n scanned: \(join(""))\n" | halt_error)) | map({show:gsub("\n";""),allow:test($re_allow_for_json)}) | (select(all(.allow) | not) | "WARN: potentially JSON-unstable regex, disabling prefilter optimization...\noriginal regex: `\(map(.show)|join(""))`\n unsupported: `\(map(if .allow then .show | gsub(".";" ") else .show end)|join(""))`\n" | halt_error)) // .)', \ | |
--arg,re_scan,"[^\\\\]|\\\\.", \ | |
--arg,re_allow_for_json,"\A([ !#%-\-/-Z_-~]|\\\\[\$()*+\.?\[\]^{|}])\z"} \ | |
--jq={'-R','select(. == "-SNIP-")//(fromjson | ((select(.type == "m.room.message") | .content.body | strings | test($ENV.regex))//false) as $match | "\($ENV.mx_url_prefix)/\(.room_id)/\(.event_id)" as $url | (.origin_server_ts | ./1e3 | [strflocaltime("%Y-%m-%d %H:%M:%S"), strflocaltime("%H:%M")]) as [$datetime, $time] | (.sender | sub("^@";"") | sub(":matrix.org$";"")) as $sender | "<tr\((" class=match"|select($match))//"")><td class=time><a href=\\"\($url)\\" title=\\"\($datetime)\\">\($time)</a></td><td class=user><b>\($sender)</b></td><td class=msg>\((select(.type == "m.room.message") | ((.content.formatted_body | strings | select(length > 0)) // @html"\(.content.body | strings)")) // @html"<pre style=\\"color:crimson;font-size:0.8em\\">\("\(.type) {\n\(.content | to_entries | map(@json" \(.key): \(.value)") | join("\n"))\n}")</pre>")</td></tr>")'} \ | |
--begin='export mx_url_prefix="$matrix_url_prefix";' \ | |
--re_postfilter='^<tr class=match>' \ | |
--table_separator='<tr class=sep><td colspan=3></td></tr>' \ | |
--out='(echo "<style>$css</style><table>"; cat; echo "</table>") > "$html"' \ | |
--begin='html="$(mktemp --suffix=.html)"' \ | |
--css='html{background:#15191e;color:#ffffff}tr.sep>td{border-top:1px solid}tr:not(.match,.sep){opacity:0.4}a:not(:hover){text-decoration:none}a,a:visited{color:#238cf5}.time>a{color:#b9bec6}.user{text-align:right}.msg{padding:0.5em;border-radius:0.5em}tr:hover>.msg{background:#21262c}code{background:#2a3039;border-radius:0.2em}' \ | |
--end='xdg-open "$html" || "${BROWSER:-firefox}" "$html"' \ | |
--#_='Remove #_ from/keep only the variant you want to use' \ | |
--#_matrix_url_prefix='https://app.element.io/#/room' \ | |
--matrix_url_prefix='https://develop.element.io/#/room' \ | |
--#_matrix_url_prefix='matrix:r' \ | |
$argv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Rough usage guidelines:
$HOME/logs
"type":"m.room.member"
for whoever created the room.jsonl.zst
(one JSON object per line + zstd compression):.messages.jsonl.zst
file: