Skip to content

Instantly share code, notes, and snippets.

@chrismwendt
Last active October 11, 2021 21:08
Show Gist options
  • Save chrismwendt/0109758559e537c8cbd9bb67d7b3be57 to your computer and use it in GitHub Desktop.
Save chrismwendt/0109758559e537c8cbd9bb67d7b3be57 to your computer and use it in GitHub Desktop.
Script for running SPARQL queries against LSIF dumps for debugging (rdflib and oxigraph)
#!/usr/bin/env python3
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import Terminal256Formatter
from pprint import pformat
import os
import json
from prettytable import PrettyTable
from colored import stylize, fg
from pyoxigraph import MemoryStore, NamedNode, NamedNode, Literal, Quad
import time
import urllib
import re
# oxigraph is annoying to use because all values need to be valid URIs, so
# you end up having to encode/decode arbitrary data into URIs.
# Also, I'm not sure if it supports aggregations.
def main():
# os.chdir('/Users/chrismwendt/github.com/sourcegraph/lsif-go')
# subprocess.run('go build -o lsif-go ./cmd/lsif-go', shell=True, check=True)
os.chdir('/Users/chrismwendt/github.com/sourcegraph/scratch')
# os.chdir('/Users/chrismwendt/github.com/sourcegraph/src-cli')
# subprocess.run('/Users/chrismwendt/github.com/sourcegraph/lsif-go/lsif-go', shell=True, check=True)
store = MemoryStore()
def encode(v):
return 'scheme:' + urllib.parse.quote(str(v))
def decode(v):
return urllib.parse.unquote(v.removeprefix('scheme:'))
def l(v):
return '<scheme:' + urllib.parse.quote(str(v)) + '>'
def nn(v):
return NamedNode(encode(v))
def add(a, b, c):
store.add(Quad(nn(a), nn(b), nn(c)))
def query(q):
print('before', q)
q = re.sub(r':(\w+)', lambda match: l(match.group(1)), q)
print('after ', q)
start = time.time()
z = store.query(q)
print(time.time() - start)
x = PrettyTable()
x.field_names = [v for v in z.variables]
for row in z:
x.add_row([decode(x.value) if x is not None else "NONE" for x in row])
x.align = "l"
print(x)
with open('/Users/chrismwendt/github.com/sourcegraph/scratch/dump.lsif') as dump_file:
# with open('/Users/chrismwendt/github.com/sourcegraph/src-cli/dump.lsif') as dump_file:
for line in dump_file.readlines():
el = json.loads(line.rstrip())
# outV -> from
# inV -> inVs
# inVs -> to
if el.get('outV') is not None:
el['from'] = el.pop('outV')
if el.get('inV') is not None:
el['inVs'] = [el.pop('inV')]
if el.get('inVs') is not None:
el['to'] = el.pop('inVs')
id = el.pop('id')
for k, v in flatten(el):
add(id, k, v)
print(stylize("vertex -edge-> vertex", fg("green")))
query("""
SELECT ("sup" AS ?foo) {
?id1 :label ?l1 .
}
""")
def flatten(d):
r = []
for k, v in d.items():
if type(v) is dict:
for k2, v2 in flatten(v):
r.append([f"{k}.{k2}", v2])
elif type(v) is list:
for vi in v:
r.append([k, vi])
else:
r.append([k, v])
return r
def flat_list(l):
return [item for sublist in l for item in sublist]
def p(obj):
print(highlight(pformat(obj), PythonLexer(), Terminal256Formatter()))
if __name__ == '__main__':
main()
#!/usr/bin/env python3
from rdflib import Graph, Literal
from rdflib.namespace import Namespace
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import Terminal256Formatter
from pprint import pformat
import os
import json
from prettytable import PrettyTable
from colored import stylize, fg
def main():
# os.chdir('/Users/chrismwendt/github.com/sourcegraph/lsif-go')
# subprocess.run('go build -o lsif-go ./cmd/lsif-go', shell=True, check=True)
os.chdir('/Users/chrismwendt/github.com/sourcegraph/scratch')
# os.chdir('/Users/chrismwendt/github.com/sourcegraph/src-cli')
# subprocess.run('/Users/chrismwendt/github.com/sourcegraph/lsif-go/lsif-go', shell=True, check=True)
g = Graph()
n = Namespace('')
g.bind("", n)
def add(a, b, c):
g.add([Literal(a), n[b], Literal(c)])
def query(q):
z = g.query(q)
x = PrettyTable()
x.field_names = [v for v in z.vars]
for row in g.query(q):
x.add_row(row)
x.align = "l"
print(x)
with open('dump-before-doc-fix.lsif') as dump_file:
for line in dump_file.readlines():
el = json.loads(line.rstrip())
# outV -> from
# inV -> inVs
# inVs -> to
if el.get('outV') is not None:
el['from'] = el.pop('outV')
if el.get('inV') is not None:
el['inVs'] = [el.pop('inV')]
if el.get('inVs') is not None:
el['to'] = el.pop('inVs')
id = el.pop('id')
for k, v in flatten(el):
add(id, k, v)
print(stylize("vertex -edge-> vertex", fg("green")))
query("""
SELECT DISTINCT (CONCAT(?l1, " ", ?el, " ", ?l2) as ?v_e_v) {
?id1 :label ?l1 .
?id2 :label ?l2 .
?e :from ?id1 .
?e :label ?el .
?e :to ?id2 .
}
""")
print(
stylize("distinct :type + :label for elements with :document",
fg("green")))
query("""
SELECT DISTINCT (CONCAT(?type, ".", ?l) as ?type_label) {
?id1 :type ?type .
?id1 :label ?l .
?id1 :document ?_ .
}
""")
print(
stylize(
"distinct :label for nodes that have outgoing edges with :document",
fg("green")))
query("""
SELECT DISTINCT ?l {
?n1 :label ?l .
?e1 :from ?n1 .
?e1 :document ?_ .
}
""")
print(stylize("docs", fg("green")))
query("""
SELECT DISTINCT ?n1 ?uri {
?n1 :label "document" .
?n1 :uri ?uri .
}
""")
print(stylize("contains", fg("green")))
query("""
SELECT DISTINCT ?n1 (GROUP_CONCAT(?contains) as ?contains) {
?e1 :label "contains" .
?e1 :from ?n1 .
?e1 :to ?contains .
}
GROUP BY ?n1
""")
print(stylize("edges with doc -> ranges", fg("green")))
query("""
SELECT DISTINCT ?doc (GROUP_CONCAT(?range) as ?ranges) {
?e1 :document ?doc .
?e1 :to ?range .
}
GROUP BY ?doc
""")
print(stylize("ranges and their docs", fg("green")))
query("""
SELECT DISTINCT ?range (GROUP_CONCAT(DISTINCT ?doc) as ?docs) {
?e1 :document ?doc .
?e1 :to ?range .
}
GROUP BY ?range
""")
def flatten(d):
r = []
for k, v in d.items():
if type(v) is dict:
for k2, v2 in flatten(v):
r.append([f"{k}.{k2}", v2])
elif type(v) is list:
for vi in v:
r.append([k, vi])
else:
r.append([k, v])
return r
def flat_list(l):
return [item for sublist in l for item in sublist]
def p(obj):
print(highlight(pformat(obj), PythonLexer(), Terminal256Formatter()))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment