-
Notifications
You must be signed in to change notification settings - Fork 35
Querying datasets created by csv2rdf4lod
- Namespace prefix handling
- Dataset composition resulting from naming by source, dataset, and version
- Aggregating subsets of converted datasets
- Querying HTTP redirect provenance to find new dataset versions
- Provenance
Show a time line of which version of the converter was used, how many datasets were converted with it, when it was last used (results):
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX doap: <http://usefulinc.com/ns/doap#>
PREFIX pmlp: <http://inference-web.org/2.0/pml-provenance.owl#>
PREFIX pmlj: <http://inference-web.org/2.0/pml-justification.owl#>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT distinct max(?date) as ?modified ?converter ?converter_doap_revision count(?dataset) as ?count
WHERE {
GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
?dataset a conversion:VersionedDataset;
void:dataDump ?dumpFile .
optional { ?dataset dcterms:modified ?date }
?ns pmlj:hasConclusion ?dumpFile;
pmlj:isConsequentOf [
a pmlj:InferenceStep;
pmlj:hasInferenceEngine ?converter
]
.
optional { ?converter doap:revision ?converter_doap_revision }
}
} group by ?converter ?converter_doap_revision order by desc(?modified) ?count
What datasets reference resources that are owl:sameAs other resources (results)?
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX ov: <http://open.vocab.org/terms/>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT distinct ?layer ?col ?label ?lod_link
WHERE {
graph <http://purl.org/twc/vocab/conversion/ConversionProcess> {
?layer
conversion:conversion_process [
conversion:enhance [
ov:csvCol ?col;
conversion:label ?label;
conversion:links_via ?lod_link;
];
]
.
}
}
Is there a way to know which datasets are fully loaded in the sparql endpoint?
was there any update on this question? Is running the sparql query as below at http://logd.tw.rpi.edu/sparql return the complete list of loaded datasets?
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT ?g sum( ?triples ) as ?estimated_triples
WHERE {
GRAPH ?g {
?g void:subset ?subdataset .
?subdataset conversion:num_triples ?triples .
filter regex(?g, "data-gov")
}
}
GROUP BY ?g
bad sources: prefix dcterms: http://purl.org/dc/terms/ prefix conversion: http://purl.org/twc/vocab/conversion/
SELECT count(distinct ?organization) as ?count
WHERE {
graph <http://logd.tw.rpi.edu/vocab/Dataset> {
?dataset a conversion:Dataset;
dcterms:source ?organization .
filter(!regex(str(?organization),".*provenance_file.*"))
}
}
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT ?subset ?modified
WHERE {
GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
<http://logd.tw.rpi.edu/source/data-gov/dataset/92> void:subset ?subset .
optional { ?subset dcterms:modified ?modified }
}
}order by desc(?modified)
Alvaro is using this query http://logd.tw.rpi.edu/query/logd-data-list-latest-dump-file-for-dataset.sql to obtain the latest dump for a dataset. However they appear only for some datasets (see http://logd.tw.rpi.edu/datasets)
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
prefix conversion: <http://purl.org/twc/vocab/conversion/>
SELECT distinct ?dataset ?dump_file
WHERE {
graph <http://logd.tw.rpi.edu/vocab/Dataset> {
?dataset
a conversion:Dataset;
void:subset ?version .
?version a conversion:VersionedDataset .
optional {
?version void:subset ?layer .
{
{
?layer
void:dataDump ?dump_file ;
dcterms:created ?creationtime .
}
UNION
{
?descriminator conversion:num_triples ?triples .
?layer
void:dataDump ?dump_file ;
dcterms:created ?creationtime .
}
}
}
}
}
ORDER BY DESC(?creationtime)
- http://tw.rpi.edu/wiki/tw:17_Sept_2010_SWC_LOG_notes
- http://logd.tw.rpi.edu/tutorial/exploring_logd_metadata_with_sparql
- http://logd.tw.rpi.edu/tutorial/How_to_find_datasets_using_the_LOGD_sparql_endpoint
- http://code.google.com/p/data-gov-wiki/issues/detail?id=35
(a few more sprinkled around)
Trying to get to the param files (so we can count their triples so quantify effort to create them).
Use case: find the parameters used during the conversion. (querying this is now difficult and needs to be eased)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX pmlp: <http://inference-web.org/2.0/pml-provenance.owl#>
PREFIX pmlj: <http://inference-web.org/2.0/pml-justification.owl#>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT distinct ?conclusion
WHERE {
GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
?versioned rdf:first ?thing .
?thing pmlj:hasConclusion ?conclusion .
?conclusion pmlp:hasFormat <http://inference-web.org/registry/FMT/RDFAbstractSyntax.owl#RDFAbstractSyntax> .
}
}
Finding all of the sources:
PREFIX pmlp: <http://inference-web.org/2.0/pml-provenance.owl#>
PREFIX irw: <http://www.ontologydesignpatterns.org/ont/web/irw.owl#>
SELECT ?url
WHERE {
GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
?url a pmlp:Source .
optional { ?url irw:redirectsTo ?none }
filter(!bound(?none))
}
}
Finding datasets from their conversion parameters:
prefix conversion: <http://purl.org/twc/vocab/conversion/>
select ?dataset
where {
graph <http://purl.org/twc/vocab/conversion/ConversionProcess> {
?dataset conversion:conversion_process [
]
}
GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
?dataset a []
}
#GRAPH <http://purl.org/twc/vocab/conversion/MetaDataset> {
# ?dataset a []
#}
}
Modifying LOGD's dataset listing query with negation (er.. OPTIONAL{}+!BOUND()..) shows datasets that have been converted but do not have metadata (results):
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
PREFIX catalog: <http://logd.tw.rpi.edu/source/twc-rpi-edu/dataset/dataset-catalog/vocab/enhancement/1/>
PREFIX ds92: <http://logd.tw.rpi.edu/source/data-gov/dataset/92/vocab/enhancement/1/>
SELECT DISTINCT ?dataset ?Dataset_Identifier ?Page ?Title ?Agency
?Description ?homepage SUM(?triples) as ?Number_of_Triples
WHERE {
GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
?dataset
a conversion:Dataset;
foaf:isPrimaryTopicOf ?Page ;
dcterms:identifier ?Dataset_Identifier ;
void:subset ?version .
?version a conversion:VersionedDataset .
?version void:subset ?layer .
{
?layer conversion:num_triples ?triples .
?layer void:dataDump ?dump_file.
} UNION {
?layer void:subset ?descriminator .
?descriminator conversion:num_triples ?triples .
?layer void:dataDump ?dump_file.
}
}
OPTIONAL {
GRAPH <http://purl.org/twc/vocab/conversion/MetaDataset> {
?dataset dcterms:title ?Title .
?dataset dcterms:description ?Description.
?dataset catalog:source_agency [ rdfs:label ?Agency ]
OPTIONAL {
?dataset foaf:homepage ?homepage .
}
}
}
FILTER(!BOUND(?Title))
}
ORDER BY ?Dataset_Identifier