Skip to content

Instantly share code, notes, and snippets.

@rlskoeser
Last active March 29, 2016 21:14
Show Gist options
  • Save rlskoeser/f33f723865a19c7aaf2f to your computer and use it in GitHub Desktop.
Save rlskoeser/f33f723865a19c7aaf2f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# fedora 4 test client
# NOTE: this is experimental / spike code!!
# (written to get a sense of the fedora 4 LDP API)
import requests
import rdflib
DC = rdflib.Namespace('http://purl.org/dc/elements/1.1/')
XMLSchema = rdflib.Namespace('http:////www.w3.org/2001/XMLSchema#')
LDP = rdflib.Namespace('http://www.w3.org/ns/ldp#')
FEDORA4 = rdflib.Namespace('http://fedora.info/definitions/v4/repository#')
# rdf descriptors for easy access to get/set rdf values
class Value(object):
"""A data descriptor that gets a rdf single value.
"""
# break out into types by xmlschema type, like eulxml?
# StringField, DateField, etc. then conversion is easy...
# but do we always know what a field will contain?
def __init__(self, predicate, datatype=None, normalize=False):
self.predicate = predicate
self.datatype = datatype
self.normalize = normalize
def __get__(self, obj, objtype=None):
if obj is None:
return self
return self.to_python(obj.graph.value(obj.uriref, self.predicate))
def __set__(self, obj, value):
# if any namespace prefixes were specified, bind them before adding the tuple
# for prefix, ns in self.ns_prefix.iteritems():
# obj.rels_ext.content.bind(prefix, ns)
# TODO: do we need to check that subject matches self.object_type (if any)?
if value == self.__get__(obj):
print 'no change, returning'
return
if isinstance(value, rdflib.URIRef):
uri = value
elif hasattr(value, 'uriref'):
uri = value.uriref
elif self.datatype:
uri = rdflib.Literal(value, datatype=self.datatype)
else:
print 'rdf literal'
uri = rdflib.Literal(value, datatype=XMLSchema.string)
# update requires delete/insert, so make a note of what we're replacing
old_objs = list(obj.graph.objects(obj.uriref, self.predicate))
if old_objs:
#FIXME: doesn't match if one has datatype and the other doesn't
print 'old objs = ', repr(old_objs[0]), ' new uri = ', repr(uri)
print old_objs[0].__class__
if old_objs[0] == uri:
print 'no change, returning'
return
print 'marking ', old_objs[0], ' for deletion'
obj.graph_deletes.set((obj.uriref, self.predicate, old_objs[0]))
new_triple = (obj.uriref, self.predicate, uri)
obj.graph.set(new_triple)
obj.graph_updates.set(new_triple)
def __delete__(self, obj):
old_objs = list(obj.graph.objects(obj.uriref, self.predicate))
if old_objs:
#FIXME: doesn't match if one has datatype and the other doesn't
print 'marking ', old_objs[0], ' for deletion'
obj.graph_deletes.set((obj.uriref, self.predicate, old_objs[0]))
obj.graph.remove((obj.uriref, self.predicate, old_objs[0]))
def to_python(self, val):
# NOTE: could check for and use val.toPython() ...
# if we got a 'none' return as is (don't convert to "None")
if val is None:
return val
if self.datatype is not None:
val = rdflib.Literal(val, datatype=self.datatype).toPython()
elif isinstance(val, rdflib.Literal):
val = val.toPython()
# NOTE: doesn't convert rdflib.uriref; should it?
if self.normalize:
val = normalize_whitespace(val)
return val
class ValueList(Value):
"""A data descriptor that gets a list of rdf values.
"""
def __init__(self, predicate, datatype=None, normalize=False):
self.predicate = predicate
self.datatype = datatype
self.normalize = normalize
def __get__(self, obj, objtype=None):
if obj is None:
return self
results = [self.to_python(o) for o in obj.graph.objects(obj.uriref, self.predicate)]
return results
class ResourceList(object):
def __init__(self, predicate, resource_type):
self.predicate = predicate
self.resource_type = resource_type
def __get__(self, obj, objtype):
if obj is None:
return self
if self.resource_type == 'self':
resource_type = obj.__class__
else:
resource_type = self.resource_type
return [resource_type(o) for o in obj.graph.objects(obj.uriref, self.predicate)]
class DublinCoreMixin(object):
dc_title = Value(DC.title)
dc_creator = Value(DC.creator)
dc_publisher = Value(DC.publisher)
class FedoraMixin(object):
created = Value(FEDORA4.created)
last_modified = Value(FEDORA4.lastModified)
frozen_uuid = Value(FEDORA4.frozenUuid)
frozen_primary_types = Value(FEDORA4.frozenPrimaryTypes)
frozen_mixin_types = Value(FEDORA4.frozenMixinTypes)
uuid = Value(FEDORA4.uuid)
last_modified_by = Value(FEDORA4.lastModifiedBy)
created_by = Value(FEDORA4.createdBy)
# fedora-specific things would go here, e.g.
# versions
# fixity
# transform
# repo object: support transactions, define transforms (?)
class LdpResource(DublinCoreMixin, FedoraMixin):
rdf_types = ValueList(rdflib.RDF.type)
members = ResourceList(LDP.contains, 'self')
def __init__(self, uri=None, container=None): # optional when creating new...
self.container = container
self.uri = uri
# maybe a dynamic resource that figures out what kind of thing it is
# at initialization? make a head request and then initialize
# as appropriate type (passing in data to avoid repeating the request)
# response = requests.head(self.uri)
# print response.headers
# need requests to do content-negotiation, last modified, etc
# load on demand
self.graph = rdflib.Graph()
# create a graph to hold updates so we can patch just the changes
self.graph_updates = rdflib.Graph()
self.graph_deletes = rdflib.Graph()
if self.uri is not None:
response = requests.get(self.uri, headers={"Accept" : 'application/rdf+xml'})
print response.headers
# NOTE: header says what kind of thing this is,
# e.g. for container:
# 'link': '<http://www.w3.org/ns/ldp#Resource>;rel="type",
# <http://www.w3.org/ns/ldp#Container>;rel="type",
# <http://www.w3.org/ns/ldp#BasicContainer>;rel="type"'
# or for binary:
#'link': '<http://www.w3.org/ns/ldp#Resource>;rel="type", <http://www.w3.org/ns/ldp#NonRDFSource>;rel="type",
# <http://localhost:8080/fcrepo/rest/cover/cover2.jpg/fcr:metadata>; rel="describedby"',
# don't try to load as rdf it isn't rdf!
if response.headers['content-type'] == 'application/rdf+xml':
# print response.content
self.graph.parse(data=response.content)
# for ns_prefix, ns in self.graph.namespaces():
# self.graph_updates.bind(ns_prefix, ns)
# print rdflib.RDF.type
# print '\n'.join(list(self.graph.objects(self.uriref, rdflib.RDF.type)))
@property
def uriref(self):
if self.uri is not None:
return rdflib.URIRef(self.uri)
return rdflib.URIRef('')
def create(self):
# param opt
# CHECKSUM (Optional) Provide a SHA-1 checksum which will be checked against the uploaded content to ensure error-free transfer.
# headers
# CONTENT-DISPOSITION (Optional) The filename provided in the content disposition header will be stored in a ebucore:filename property.
# CONTENT-TYPE (Optional) MIME type of the uploaded binary or RDF content.
# CONTENT-LOCATION (Optional) A URI to a resource to use instead of the request body
#If the MIME type corresponds to a supported RDF format or SPARQL-Update, the uploaded content will be parsed as RDF and used to populate the child node properties. RDF will be interpreted using the current resource as the base URI (e.g. <> will be expanded to the current URI). Namespaces must be declared in full.
# For other MIME types, the uploaded content will be used to create a binary resource.
#SLUG - suggested name for new resource
print self.graph.serialize()
response = requests.post(self.container, headers={'content-type': 'application/rdf+xml'},
data=self.graph.serialize())
if response.status_code == requests.codes.created:
# also returns etag
self.uri = response.headers['location']
print response
print response.content
def save(self):
# for now, assuming patch update of rdf properties
if not self.graph_deletes and not self.graph_updates:
print 'no deletes or updates, returning'
return
prefixes = []
# for ns_prefix, ns in self.graph_updates.namespaces():
# prefixes.append('PREFIX %s: <%s>' % (ns_prefix, ns))
inserts = []
for pred, obj in self.graph_updates.predicate_objects(self.uriref):
inserts.append('<> <%s> "%s"' % (pred, obj))
deletes = []
for pred, obj in self.graph_deletes.predicate_objects(self.uriref):
deletes.append('<> <%s> "%s"' % (pred, obj))
delete = '''%(prefixes)s
DELETE DATA
{ %(deletes)s }''' % {
'prefixes': '\n'.join(prefixes),
'inserts': ' .\n'.join(inserts),
'deletes': ' .\n'.join(deletes)
}
print delete
response = requests.patch(self.uri, delete,
headers={"Content-Type": 'application/sparql-update'})
print response
update = '''%(prefixes)s
INSERT DATA
{ %(inserts)s }
''' % {
'uri': self.uri,
'prefixes': '\n'.join(prefixes),
'inserts': ' .\n'.join(inserts),
'deletes': ' .\n'.join(deletes)
}
# INSERT {
# %(inserts)s
# } WHERE {}
# ''' % {
# 'prefixes': '\n'.join(prefixes),
# 'inserts': ' .\n'.join(inserts)
# }
print update
response = requests.patch(self.uri, update,
headers={"Content-Type": 'application/sparql-update'})
print response.content
class NonRdfResource(object):
content = None
checksum = None # sha-1 checksum
filename = None
content_type = None
def __init__(self, uri=None, container=None):
# same as ldp resource
self.container = container
self.uri = uri
def create(self):
headers = {}
if self.content_type:
headers['Content-Type'] = self.content_type
if self.filename:
headers['Content-Disposition'] = self.filename
print 'headers = ', headers
response = requests.post(self.container,
headers=headers, data=self.content)
if response.status_code == requests.codes.created:
# also returns etag
self.uri = response.headers['location']
print response
print response.content
# interact with existing cover object created in fedora4 tutorial
cover = LdpResource('http://localhost:8080/fcrepo/rest/cover')
print cover.dc_publisher
print cover.dc_title
for t in cover.rdf_types:
print t
cover.dc_publisher = 'emory'
del cover.dc_title
print cover.dc_publisher
print cover.dc_title
print cover.graph_updates.serialize()
cover.save()
print 'created ', cover.created
print 'last modified ', cover.last_modified
print 'created by', cover.created_by
for m in cover.members:
print m.uri
# create a new object within the cover container
# newobj = LdpResource(container='http://localhost:8080/fcrepo/rest/cover')
# newobj.dc_title = 'create new'
# newobj.create()
# create a non-rdf resource within the cover container
newfile = NonRdfResource(container='http://localhost:8080/fcrepo/rest/cover')
with open('/Users/rsutton/Downloads/networks-book.pdf') as pdf:
newfile.content = pdf
newfile.filename = 'networks-book.pdf'
newfile.content_type = 'application/pdf'
newfile.create()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment