Last active
December 1, 2017 03:59
-
-
Save evren/5612900 to your computer and use it in GitHub Desktop.
Example of using Stardog for data validation for the example described at http://www.w3.org/2012/12/rdf-val/SOTA.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Stardog commands and the output for RDF validation example | |
# First create the Stardog database and load data | |
$ ./stardog-admin db create -n sota sota-data.ttl | |
Bulk loading data to new database. | |
Loading data completed...Loaded 25 triples in 00:00:00 @ 0.4K triples/sec. | |
Successfully created database 'sota'. | |
# Then add the constraints to the database | |
$ ./stardog-admin icv add sota sota-constraints.ttl | |
Successfully added constraints in 00:00:00. | |
# Now run the validation command | |
# This command just prints which constraints are violated | |
$ ./stardog icv validate sota | |
Data is NOT valid. | |
The following constraints were violated: | |
AxiomConstraint{:reportedOn rdfs:domain :Issue} | |
AxiomConstraint{:related rdfs:range :Issue} | |
AxiomConstraint{:Issue rdfs:subClassOf (:reportedBy exactly 1 owl:Thing)} | |
AxiomConstraint{:reproducedBy rdfs:range foaf:Person} | |
AxiomConstraint{:reportedBy rdfs:range foaf:Person} | |
AxiomConstraint{:state rdfs:domain :Issue} | |
AxiomConstraint{:state rdfs:range :ValidState} | |
# Now run the explanation command to get details about violations | |
# We use the --merge option to group related violations together | |
# By default only one explanation is printed so we increase the limit to 10 | |
$ ./stardog icv explain --limit 10 --merge sota | |
VIOLATED :reportedOn rdfs:domain :Issue | |
ASSERTED :issue4 :reportedOn "x0" | |
NOT_INFERRED :issue4 a :Issue | |
1.1) VIOLATED :related rdfs:range :Issue | |
ASSERTED :issue7 :related :issue4 | |
NOT_INFERRED :issue4 a :Issue | |
1.2) VIOLATED :related rdfs:range :Issue | |
ASSERTED :issue7 :related :issue3 | |
NOT_INFERRED :issue3 a :Issue | |
1.3) VIOLATED :related rdfs:range :Issue | |
ASSERTED :issue7 :related :issue2 | |
NOT_INFERRED :issue2 a :Issue | |
VIOLATED :Issue rdfs:subClassOf (:reportedBy exactly 1 owl:Thing) | |
ASSERTED :issue7 :reportedBy :user2 | |
ASSERTED :issue7 a :Issue | |
ASSERTED :issue7 a owl:Thing | |
ASSERTED :issue7 :reportedBy :user6 | |
NOT_INFERRED :issue7 :reportedBy <tag:stardog:api:variable:x0> | |
VIOLATED :reproducedBy rdfs:range foaf:Person | |
ASSERTED :issue7 :reproducedBy :user1 | |
NOT_INFERRED :user1 a foaf:Person | |
VIOLATED :reportedBy rdfs:range foaf:Person | |
ASSERTED :issue7 :reportedBy :user6 | |
NOT_INFERRED :user6 a foaf:Person | |
VIOLATED :state rdfs:domain :Issue | |
ASSERTED :issue4 :state :unsinged | |
NOT_INFERRED :issue4 a :Issue | |
VIOLATED :state rdfs:range :ValidState | |
ASSERTED :issue4 :state :unsinged | |
NOT_INFERRED :unsinged a :ValidState | |
# We can also add SPARQL queries as constraints | |
$ ./stardog-admin icv add sota sota-query.sparql | |
# We can run validation with a mixture of OWL constraints and SPARQL constraints | |
$ ./stardog icv validate sota | |
Data is NOT valid. | |
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@prefix owl: <http://www.w3.org/2002/07/owl#> . | |
@prefix : <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> . | |
@prefix foaf: <http://xmlns.com/foaf/0.1/'> . | |
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . | |
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . | |
:Issue a owl:Class ; | |
rdfs:subClassOf | |
[ owl:onProperty :state ; owl:cardinality 1 ] , | |
[ owl:onProperty :reportedBy ; owl:cardinality 1 ] , | |
[ owl:onProperty :reportedOn ; owl:cardinality 1 ] , | |
[ owl:onProperty :reproducedBy ; owl:minCardinality 0 ] , | |
[ owl:onProperty :reproducedOn ; owl:minCardinality 0 ] , | |
[ owl:onProperty :related ; owl:minCardinality 0 ] . | |
:state a owl:ObjectProperty , | |
owl:FunctionalProperty ; rdfs:domain :Issue ; rdfs:range :ValidState . | |
:related a owl:ObjectProperty ; rdfs:domain :Issue ; rdfs:range :Issue . | |
:reportedBy a owl:ObjectProperty ; rdfs:domain :Issue ; rdfs:range foaf:Person . | |
:reportedOn a owl:DatatypeProperty ; rdfs:domain :Issue ; rdfs:range xsd:dateTime . | |
:reproducedBy a owl:ObjectProperty ; rdfs:domain :Issue ; rdfs:range foaf:Person . | |
:reproducedOn a owl:DatatypeProperty ; rdfs:domain :Issue ; rdfs:range xsd:dateTime . | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@prefix : <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> . | |
@prefix foaf: <http://xmlns.com/foaf/0.1/'> . | |
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . | |
@base <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> . | |
<#issue7> a :Issue , :SecurityIssue ; | |
:state :unassigned ; | |
:reportedBy <#user6> , <#user2> ; # only one reportedBy permitted | |
:reportedOn "2012-12-31T23:57:00Z"^^xsd:dateTime ; | |
:reproducedBy <#user2>, <#user1> ; | |
:reproducedOn "2012-10-31T23:57:00Z"^^xsd:dateTime ; # reproduced before being reported | |
:related <#issue4>, <#issue3>, <#issue2> . # referenced issues not included | |
<#issue4> # a ??? - missing type arc | |
:state :unsinged ; # misspelled term in value set.# :reportedBy ??? - missing required property | |
:reportedOn "2012-12-31T23:57:00Z"^^xsd:dateTime . | |
<#user2> a foaf:Person ; | |
foaf:givenName "Alice" ; | |
foaf:familyName "Smith" ; | |
foaf:phone <tel:+1.555.222.2222> ; | |
foaf:mbox <mailto:alice@example.com> . | |
<#user6> a foaf:Agent ; # should be foaf:Person | |
foaf:givenName "Bob" ; # foaf:familyName "???" - missing required property | |
foaf:phone <tel:+.555.222.2222> ; # malformed tel: URL | |
foaf:mbox <mailto:alice@example.com> . | |
:assigned a :ValidState . | |
:unassigned a :ValidState . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2010 - 2015, Clark & Parsia, LLC. <http://www.clarkparsia.com> | |
// For more information about licensing and copyright of this software, please contact | |
// inquiries@clarkparsia.com or visit http://stardog.com | |
package com.clarkparsia.pellet.examples; | |
import java.io.File; | |
import com.complexible.common.rdf.model.Namespaces; | |
import com.complexible.stardog.api.Connection; | |
import com.complexible.stardog.api.ConnectionConfiguration; | |
import com.complexible.stardog.api.admin.AdminConnection; | |
import com.complexible.stardog.api.admin.AdminConnectionConfiguration; | |
import com.complexible.stardog.icv.api.ICVConnection; | |
import com.complexible.stardog.reasoning.Proof; | |
import com.complexible.stardog.reasoning.ProofWriter; | |
import org.openrdf.rio.RDFFormat; | |
/** | |
* Example of using Stardog Integrity Constraint functionality for data validation example described at http://www.w3.org/2012/12/rdf-val/SOTA | |
* | |
* @author Evren Sirin | |
*/ | |
public class SOTAExample { | |
public static void main(String[] args) throws Exception { | |
if (args.length != 2) { | |
System.err.println("Usage: " + SOTAExample.class.getName() + " <data-file> <constraints-file>"); | |
System.exit(1); | |
} | |
// the db name | |
String sota = "sota"; | |
String dataFile = args[0]; | |
String constraintsFile = args[1]; | |
// first create a temporary database to use | |
// (if there is already a database with such a name, drop it first) | |
// Stardog should be running on the same machine locally for this example | |
AdminConnection aAdminConn = AdminConnectionConfiguration.toServer("snarl://localhost:5820").credentials("admin", "admin").connect(); | |
if (aAdminConn.list().contains(sota)) { | |
aAdminConn.drop(sota); | |
} | |
// Load the data in the db while creating it | |
ConnectionConfiguration aConfig = aAdminConn.memory(sota).create(new File(dataFile)); | |
// obtain a connection to the database | |
Connection aConn = aConfig.connect(); | |
// ok, we have a database, now need the validator | |
ICVConnection aValidator = aConn.as(ICVConnection.class); | |
// add the constraints, must do this in a transaction | |
aValidator.begin(); | |
aValidator.addConstraints().format(RDFFormat.TURTLE).file(new File(constraintsFile)); | |
aValidator.commit(); | |
// use namespaces to pretty print results | |
Namespaces aNamespaces = aValidator.namespaces(); | |
// check validity | |
boolean isValid = aValidator.isValid(); | |
// print validation result | |
System.out.format("Data is%s valid%n", isValid ? "" : " NOT"); | |
// if not valid print explanations | |
if (!isValid) { | |
Iterable<Proof> aViolationProofs = aValidator.explain().countLimit(10).mergeExplanations().proofs(); | |
for (Proof aProof : aViolationProofs) { | |
System.out.println(ProofWriter.toString(aNamespaces, aProof)); | |
} | |
} | |
// always close your connections when you're done | |
aConn.close(); | |
aAdminConn.close(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Bulk loading data to new database sota. | |
Creating indexes...finished in 00:00:00.001 | |
Loaded 25 triples to sota from 1 file(s) in 00:00:00.009 @ 2.8K triples/sec. | |
Successfully created database 'sota'. | |
Data is NOT valid | |
VIOLATED :reportedOn rdfs:domain :Issue | |
ASSERTED :issue4 :reportedOn "x0" | |
NOT_INFERRED :issue4 a :Issue | |
1.1) VIOLATED :related rdfs:range :Issue | |
ASSERTED :issue7 :related :issue4 | |
NOT_INFERRED :issue4 a :Issue | |
1.2) VIOLATED :related rdfs:range :Issue | |
ASSERTED :issue7 :related :issue3 | |
NOT_INFERRED :issue3 a :Issue | |
1.3) VIOLATED :related rdfs:range :Issue | |
ASSERTED :issue7 :related :issue2 | |
NOT_INFERRED :issue2 a :Issue | |
VIOLATED :Issue rdfs:subClassOf (:reportedBy exactly 1 owl:Thing) | |
ASSERTED :issue7 :reportedBy :user2 | |
ASSERTED :issue7 a :Issue | |
ASSERTED :issue7 a owl:Thing | |
ASSERTED :issue7 :reportedBy :user6 | |
NOT_INFERRED :issue7 :reportedBy <tag:stardog:api:variable:x0> | |
VIOLATED :reproducedBy rdfs:range foaf:Person | |
ASSERTED :issue7 :reproducedBy :user1 | |
NOT_INFERRED :user1 a foaf:Person | |
VIOLATED :reportedBy rdfs:range foaf:Person | |
ASSERTED :issue7 :reportedBy :user6 | |
NOT_INFERRED :user6 a foaf:Person | |
VIOLATED :state rdfs:domain :Issue | |
ASSERTED :issue4 :state :unsinged | |
NOT_INFERRED :issue4 a :Issue | |
VIOLATED :state rdfs:range :ValidState | |
ASSERTED :issue4 :state :unsinged | |
NOT_INFERRED :unsinged a :ValidState | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2010 - 2015, Clark & Parsia, LLC. <http://www.clarkparsia.com> | |
// For more information about licensing and copyright of this software, please contact | |
// inquiries@clarkparsia.com or visit http://stardog.com | |
package com.clarkparsia.pellet.examples; | |
import java.io.File; | |
import java.util.List; | |
import java.util.Set; | |
import com.complexible.common.iterations.Iteration; | |
import com.complexible.common.rdf.query.resultio.TextTableQueryResultWriter; | |
import com.complexible.stardog.StardogException; | |
import com.complexible.stardog.api.Connection; | |
import com.complexible.stardog.api.ConnectionConfiguration; | |
import com.complexible.stardog.api.admin.AdminConnection; | |
import com.complexible.stardog.api.admin.AdminConnectionConfiguration; | |
import com.complexible.stardog.icv.Constraint; | |
import com.complexible.stardog.icv.ConstraintFactory; | |
import com.complexible.stardog.icv.ConstraintViolation; | |
import com.complexible.stardog.icv.api.ICVConnection; | |
import com.google.common.collect.Sets; | |
import org.openrdf.query.BindingSet; | |
import com.google.common.base.Charsets; | |
import com.google.common.collect.Lists; | |
import com.google.common.io.Files; | |
import org.openrdf.query.TupleQueryResult; | |
import org.openrdf.query.impl.TupleQueryResultImpl; | |
import org.openrdf.query.resultio.QueryResultIO; | |
/** | |
* Example of using Stardog Integrity Constraint functionality for data validation example described at http://www.w3.org/2012/12/rdf-val/SOTA | |
* | |
* @author Evren Sirin | |
*/ | |
public class SOTAQueryExample { | |
public static void main(String[] args) throws Exception { | |
if (args.length != 2) { | |
System.err.println("Usage: " + SOTAQueryExample.class.getName() + " <data-file> <constraints-file>"); | |
System.exit(1); | |
} | |
// the db name | |
String sota = "sota"; | |
String dataFile = args[0]; | |
String constraintFile = args[1]; | |
// first create a temporary database to use | |
// (if there is already a database with such a name, drop it first) | |
// Stardog should be running on the same machine locally for this example | |
AdminConnection aAdminConn = AdminConnectionConfiguration.toServer("snarl://localhost:5820").credentials("admin", "admin").connect(); | |
if (aAdminConn.list().contains(sota)) { | |
aAdminConn.drop(sota); | |
} | |
// Load the data in the db while creating it | |
ConnectionConfiguration aConfig = aAdminConn.memory(sota).create(new File(dataFile)); | |
// obtain a connection to the database | |
Connection aConn = aConfig.connect(); | |
// ok, we have a database, now need the validator | |
ICVConnection aValidator = aConn.as(ICVConnection.class); | |
// read the SPARQL constraint from the file | |
Constraint aConstraint = ConstraintFactory.constraint(Files.toString(new File(constraintFile), Charsets.UTF_8)); | |
// validate the constraint | |
Iteration<ConstraintViolation<BindingSet>, StardogException> aViolations = aValidator.getViolationBindings(aConstraint); | |
// we should have a single violation since we validated a single constraint | |
Iteration<BindingSet, StardogException> aBindings = aViolations.next().getViolations(); | |
// there might be multiple different bindings in a constraint violation so we'll print them all | |
TupleQueryResult aResult = convertToQueryResult(aBindings); | |
QueryResultIO.write(aResult, TextTableQueryResultWriter.FORMAT, System.out); | |
// ALWAYS close iterations and connections when you're done with them! | |
aBindings.close(); | |
aViolations.close(); | |
aConn.close(); | |
aAdminConn.close(); | |
} | |
private static TupleQueryResult convertToQueryResult(Iteration<BindingSet, StardogException> theBindings) throws StardogException { | |
Set<String> aVars = Sets.newLinkedHashSet(); | |
List<BindingSet> aBindingsList = Lists.newArrayList(); | |
while (theBindings.hasNext()) { | |
BindingSet aBindingSet = theBindings.next(); | |
aVars.addAll(aBindingSet.getBindingNames()); | |
aBindingsList.add(aBindingSet); | |
} | |
return new TupleQueryResultImpl(Lists.newArrayList(aVars), aBindingsList); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Bulk loading data to new database sota. | |
Creating indexes...finished in 00:00:00.001 | |
Loaded 25 triples to sota from 1 file(s) in 00:00:00.009 @ 2.8K triples/sec. | |
Successfully created database 'sota'. | |
+--------------------------------------------------+-----------+------------+---------------+---------------+---------------------+----------------------+----------------------------------------------------------------------------------+ | |
| issue | typeArc | stateValue | reportedByArc | reportedOnArc | reportedByArcCount | reproducedOnSequence | missingRelatedIssues | | |
+--------------------------------------------------+-----------+------------+---------------+---------------+---------------------+----------------------+----------------------------------------------------------------------------------+ | |
| http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue7 | "passed" | "passed" | "passed" | "passed" | "expected 1, got 2" | "bad sequence" | "<http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue3> | | |
| | | | | | | | <http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue2>" | | |
| http://www.w3.org/2012/12/rdf-val/SOTA-ex#issue4 | "missing" | "invalid" | "missing" | "passed" | "expected 1, got 0" | "passed" | "passed" | | |
+--------------------------------------------------+-----------+------------+---------------+---------------+---------------------+----------------------+----------------------------------------------------------------------------------+ | |
Query returned 2 results in 00:00:00.012 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PREFIX : <http://www.w3.org/2012/12/rdf-val/SOTA-ex#> | |
PREFIX foaf: <http://xmlns.com/foaf/0.1/'> | |
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> | |
SELECT DISTINCT ?issue | |
(if(BOUND(?t), "passed", "missing") AS ?typeArc) | |
(if(BOUND(?state) && (?state=:unassigned || ?state=:assigned), | |
"passed", "invalid") AS ?stateValue) | |
(if(BOUND(?reportedBy), "passed", "missing") AS ?reportedByArc) | |
(if(BOUND(?reportedOn), "passed", "missing") AS ?reportedOnArc) | |
(if(!BOUND(?reportedByCount), "expected 1, got 0", | |
if(?reportedByCount=1, "passed", | |
CONCAT("expected 1, got ", STR(?reportedByCount)))) AS ?reportedByArcCount) | |
(if(!BOUND(?reproducedOn) || ?reproducedOn > ?reportedOn, | |
"passed", "bad sequence") AS ?reproducedOnSequence) | |
(if(BOUND(?missingRelatedIssuesStr), ?missingRelatedIssuesStr, "passed") | |
AS ?missingRelatedIssues) | |
WHERE { | |
# Get all viable :Issues by use of related predicates. | |
{ SELECT DISTINCT ?issue WHERE { | |
{ ?issue a :Issue } | |
UNION { ?issue :reportedBy|:reportedOn|:reproducedBy|:reproducedOn|:related ?rprt } | |
} | |
} | |
# Test for a type arc and state. | |
OPTIONAL { ?issue a ?t FILTER (?t = :Issue) } | |
OPTIONAL { ?issue :state ?state } | |
# Must have 1 reportedBy. | |
OPTIONAL { SELECT ?issue | |
(SAMPLE(?reportedBy1) AS ?reportedBy) | |
(COUNT(?reportedBy1) AS ?reportedByCount) | |
WHERE { | |
OPTIONAL { ?issue :reportedBy ?reportedBy1 } | |
} GROUP BY ?issue | |
} | |
OPTIONAL { ?issue :reportedOn ?reportedOn } | |
OPTIONAL { ?issue :reproducedBy ?reproducedBy } | |
OPTIONAL { ?issue :reproducedOn ?reproducedOn } | |
# All :related issues must be known entities. | |
OPTIONAL { | |
SELECT ?issue | |
(GROUP_CONCAT(CONCAT("<", STR(?referent), ">")) | |
AS ?missingRelatedIssuesStr) { | |
# List of missing issues related to ?issue. | |
SELECT ?issue ?referent | |
(SUM(if(BOUND(?referentP), 1, 0)) AS ?referentCount) | |
WHERE { | |
?issue :related ?referent | |
OPTIONAL { ?referent ?referentP ?referentO } | |
} GROUP BY ?issue ?referent | |
HAVING (SUM(if(BOUND(?referentP), 1, 0)) = 0) | |
} GROUP BY ?issue | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment