Skip to content

Querying datasets created by csv2rdf4lod

timrdf edited this page Mar 17, 2011 · 51 revisions

Other pages showing queries

What version of the converter was used?

Show a time line of which version of the converter was used, how many datasets were converted with it, when it was last used (results):

PREFIX dcterms:    <http://purl.org/dc/terms/>
PREFIX doap:       <http://usefulinc.com/ns/doap#>
PREFIX pmlp:       <http://inference-web.org/2.0/pml-provenance.owl#>
PREFIX pmlj:       <http://inference-web.org/2.0/pml-justification.owl#>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>

SELECT distinct max(?date) as ?modified ?converter ?converter_doap_revision count(?dataset) as ?count  
WHERE {
  GRAPH <http://logd.tw.rpi.edu/vocab/Dataset>  {

    ?dataset a conversion:VersionedDataset; 
             void:dataDump ?dumpFile .
    optional { ?dataset dcterms:modified ?date }

    ?ns pmlj:hasConclusion ?dumpFile;
        pmlj:isConsequentOf [
           a pmlj:InferenceStep;
           pmlj:hasInferenceEngine ?converter
        ]
    .
    optional { ?converter doap:revision ?converter_doap_revision }
  }
} group by ?converter ?converter_doap_revision order by desc(?modified) ?count

What datasets have owl:sameAs (by querying the enhancement parameters)

What datasets reference resources that are owl:sameAs other resources (results)?

PREFIX dcterms:    <http://purl.org/dc/terms/>
PREFIX ov:         <http://open.vocab.org/terms/>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT distinct ?layer ?col ?label ?lod_link
WHERE {
  graph <http://purl.org/twc/vocab/conversion/ConversionProcess> {
    ?layer
       conversion:conversion_process [
          conversion:enhance [
             ov:csvCol ?col;
             conversion:label ?label;
             conversion:links_via ?lod_link;
          ];
       ]
    .
  }
}

Drafts

Is there a way to know which datasets are fully loaded in the sparql endpoint?

was there any update on this question? Is running the sparql query as below at http://logd.tw.rpi.edu/sparql return the complete list of loaded datasets?

PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT ?g sum( ?triples ) as ?estimated_triples
WHERE {
  GRAPH ?g  {
   ?g void:subset ?subdataset .
   ?subdataset conversion:num_triples ?triples .
   filter regex(?g, "data-gov")
  }
} 
GROUP BY ?g

bad sources: prefix dcterms: http://purl.org/dc/terms/ prefix conversion: http://purl.org/twc/vocab/conversion/

SELECT count(distinct ?organization) as ?count
WHERE {    
  graph <http://logd.tw.rpi.edu/vocab/Dataset> {      
    ?dataset a conversion:Dataset;
             dcterms:source ?organization .
    filter(!regex(str(?organization),".*provenance_file.*"))
  }
}

Latest version of a dataset

results:

PREFIX void:    <http://rdfs.org/ns/void#>
PREFIX dcterms: <http://purl.org/dc/terms/>

SELECT ?subset ?modified
WHERE {
  GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
    <http://logd.tw.rpi.edu/source/data-gov/dataset/92> void:subset ?subset .
    optional { ?subset dcterms:modified ?modified }
  }
}order by desc(?modified)

latest dump file

Alvaro is using this query http://logd.tw.rpi.edu/query/logd-data-list-latest-dump-file-for-dataset.sql to obtain the latest dump for a dataset. However they appear only for some datasets (see http://logd.tw.rpi.edu/datasets)

PREFIX foaf:       <http://xmlns.com/foaf/0.1/>
PREFIX dcterms:    <http://purl.org/dc/terms/>
prefix conversion: <http://purl.org/twc/vocab/conversion/>

SELECT distinct ?dataset ?dump_file
WHERE {

 graph <http://logd.tw.rpi.edu/vocab/Dataset> {
       ?dataset
            a conversion:Dataset;
            void:subset ?version .
       ?version a conversion:VersionedDataset .

  optional {
   ?version void:subset  ?layer .
   {
    {
     ?layer 
            void:dataDump ?dump_file ;
            dcterms:created ?creationtime .
    }
    UNION
    {
     ?descriminator conversion:num_triples ?triples .
     ?layer  
             void:dataDump ?dump_file ;
             dcterms:created ?creationtime .
    }
   }
  }
 }
}
ORDER BY DESC(?creationtime)

Historical notes

(a few more sprinkled around)

Provenance queries

Trying to get to the param files (so we can count their triples so quantify effort to create them).

Use case: find the parameters used during the conversion. (querying this is now difficult and needs to be eased)

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX pmlp: <http://inference-web.org/2.0/pml-provenance.owl#>
PREFIX pmlj: <http://inference-web.org/2.0/pml-justification.owl#>

PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
SELECT distinct ?conclusion
WHERE {
  GRAPH <http://logd.tw.rpi.edu/vocab/Dataset>  {
    ?versioned rdf:first ?thing .
    ?thing pmlj:hasConclusion ?conclusion .
    ?conclusion pmlp:hasFormat <http://inference-web.org/registry/FMT/RDFAbstractSyntax.owl#RDFAbstractSyntax> .
  }
}

Finding all of the sources:

PREFIX pmlp: <http://inference-web.org/2.0/pml-provenance.owl#>
PREFIX irw:  <http://www.ontologydesignpatterns.org/ont/web/irw.owl#>

SELECT ?url
WHERE {
  GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
               ?url a pmlp:Source .
    optional { ?url irw:redirectsTo ?none }
                      filter(!bound(?none))
  }
}

Finding datasets from their conversion parameters:

prefix conversion: <http://purl.org/twc/vocab/conversion/>

select ?dataset
where {
  graph <http://purl.org/twc/vocab/conversion/ConversionProcess> {
    ?dataset conversion:conversion_process [
    ] 
  }
  GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
    ?dataset a []
  }
  #GRAPH <http://purl.org/twc/vocab/conversion/MetaDataset> {
  #  ?dataset a []
  #}
}

Datasets that have been converted, but do not have the "minimal metadata"

Modifying LOGD's dataset listing query with negation (er.. OPTIONAL{}+!BOUND()..) shows datasets that have been converted but do not have metadata (results):

PREFIX foaf:       <http://xmlns.com/foaf/0.1/>
PREFIX dcterms:    <http://purl.org/dc/terms/>
PREFIX conversion: <http://purl.org/twc/vocab/conversion/>
PREFIX catalog:    <http://logd.tw.rpi.edu/source/twc-rpi-edu/dataset/dataset-catalog/vocab/enhancement/1/>
PREFIX ds92:       <http://logd.tw.rpi.edu/source/data-gov/dataset/92/vocab/enhancement/1/>
SELECT DISTINCT ?dataset ?Dataset_Identifier ?Page ?Title ?Agency 
                ?Description ?homepage SUM(?triples) as ?Number_of_Triples
WHERE {
	GRAPH <http://logd.tw.rpi.edu/vocab/Dataset> {
		?dataset
			a conversion:Dataset;
			foaf:isPrimaryTopicOf ?Page ;
			dcterms:identifier ?Dataset_Identifier ;
			void:subset ?version .
		?version a conversion:VersionedDataset .
		?version void:subset ?layer .
		{
			?layer conversion:num_triples ?triples .
			?layer void:dataDump ?dump_file.
		} UNION {
			?layer void:subset ?descriminator .
			?descriminator conversion:num_triples ?triples .
			?layer void:dataDump ?dump_file.
		}
	}

	OPTIONAL { 

        GRAPH <http://purl.org/twc/vocab/conversion/MetaDataset> {
		?dataset dcterms:title ?Title .
		?dataset dcterms:description ?Description.
		?dataset catalog:source_agency [ rdfs:label ?Agency ]
		OPTIONAL {
			?dataset foaf:homepage ?homepage .
		}
	} 
       }
       FILTER(!BOUND(?Title))
}
ORDER BY ?Dataset_Identifier

Clone this wiki locally