From e3cd70034eec56468101db59f2f8c2f12b13f2b5 Mon Sep 17 00:00:00 2001 From: Gregory Todd Williams Date: Thu, 4 Aug 2011 15:12:37 -0400 Subject: [PATCH 1/3] Added option to replace graph data using vload. --- bin/util/virtuoso/vload | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/bin/util/virtuoso/vload b/bin/util/virtuoso/vload index f23da326..7f8ea09e 100755 --- a/bin/util/virtuoso/vload +++ b/bin/util/virtuoso/vload @@ -21,12 +21,21 @@ fi # # Check usage if [ $# -ne 3 ]; then - echo "usage: `basename $0` {rdf, ttl, nt, nq} " + echo "usage: `basename $0` {rdf, ttl, nt, nq} [--replace] " exit 1 fi input_format="$1" data_file="$2" + +replace="no" +operation="Loading" +if [ $3 == "--replace" ]; then + replace="yes" + operation="Replacing" + shift +fi + graph_uri="$3" # Check if file exists @@ -35,7 +44,7 @@ if [ ! -e $data_file ]; then exit 1 fi -echo "Loading triples into graph <$graph_uri> ..." +echo "${operation} triples into graph <$graph_uri> ..." # Check to see if file is within a directory Virtuoso can read from directly. load_in_place="false" @@ -99,7 +108,13 @@ esac vload_log=$virtuoso_allowed_dir/load_`${CSV2RDF4LOD_HOME}/bin/util/dateInXSDDateTime.sh coin:slug`_$$.log +sparql_delete="" +if [ $replace == "yes" ]; then + sparql_delete="SPARQL CLEAR GRAPH <$graph_uri>;" +fi + ${isql_cmd} ${isql_pwd} << EOF &> $vload_log + $sparql_delete $load_func checkpoint; exit; @@ -119,8 +134,8 @@ fi # TODO: cat file if it contains /^... Error/ if [ ${CSV2RDF4LOD_CONVERT_DEBUG_LEVEL:-"."} == "finest" ]; then - echo "Loading finished! $vload_log:" + echo "${operation} finished! $vload_log:" cat $vload_log else - echo "Loading finished! Check $virtuoso_allowed_dir/latest.log for details." + echo "${operation} finished! Check $virtuoso_allowed_dir/latest.log for details." fi From 3bfcf8ef9b98d964269495602a2ec7eb53e0b54c Mon Sep 17 00:00:00 2001 From: Gregory Todd Williams Date: Fri, 26 Aug 2011 20:46:52 -0400 Subject: [PATCH 2/3] Updated DB.DBA.TTLP_MT call to add concurrency arguments. --- bin/util/virtuoso/vload | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/util/virtuoso/vload b/bin/util/virtuoso/vload index 7f8ea09e..3898b785 100755 --- a/bin/util/virtuoso/vload +++ b/bin/util/virtuoso/vload @@ -79,19 +79,19 @@ isql_pwd="${CSV2RDF4LOD_PUBLISH_VIRTUOSO_PASSWORD:-"dba"}" # default admin pass load_func="" case "$input_format" in "rdf") - load_func="DB.DBA.RDF_LOAD_RDFXML_MT(file_to_string_output('$file_name_full'), '', '$graph_uri');" + load_func="DB.DBA.RDF_LOAD_RDFXML_MT(file_to_string_output('$file_name_full'), '', '$graph_uri', 2, $CSV2RDF4LOD_CONCURRENCY);" # echo $load_func ;; "ttl") - load_func="DB.DBA.TTLP_MT(file_to_string_output('$file_name_full'),'','$graph_uri', 255);" + load_func="DB.DBA.TTLP_MT(file_to_string_output('$file_name_full'),'','$graph_uri', 255, 2, $CSV2RDF4LOD_CONCURRENCY);" # echo $load_func ;; "nt") - load_func="DB.DBA.TTLP_MT(file_to_string_output('$file_name_full'),'','$graph_uri', 255);" + load_func="DB.DBA.TTLP_MT(file_to_string_output('$file_name_full'),'','$graph_uri', 255, 2, $CSV2RDF4LOD_CONCURRENCY);" # echo $load_func ;; "nq") - load_func="DB.DBA.TTLP_MT(file_to_string_output('$file_name_full'),'','$graph_uri', 512);" + load_func="DB.DBA.TTLP_MT(file_to_string_output('$file_name_full'),'','$graph_uri', 512, 2, $CSV2RDF4LOD_CONCURRENCY);" # echo $load_func ;; *) From 862490e740e0c1a38e24eb7089ecc9a3dba0cbc2 Mon Sep 17 00:00:00 2001 From: Gregory Todd Williams Date: Mon, 19 Sep 2011 14:42:10 -0400 Subject: [PATCH 3/3] Updated vload and vdelete to use row-autocommit mode to reduce memory load on large graphs. Removed explicit checkpoint from vload and vdelete. Added the isql commands executed to the vload/vdelete log output. --- bin/util/virtuoso/vdelete | 4 +++- bin/util/virtuoso/vload | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bin/util/virtuoso/vdelete b/bin/util/virtuoso/vdelete index ff38c20e..771d9ddb 100755 --- a/bin/util/virtuoso/vdelete +++ b/bin/util/virtuoso/vdelete @@ -37,13 +37,15 @@ vdelete_log=$log_dir/delete_`${CSV2RDF4LOD_HOME}/bin/util/dateInXSDDateTime.sh c start=`${CSV2RDF4LOD_HOME}/bin/util/dateInXSDDateTime.sh` sparql_delete="SPARQL CLEAR GRAPH <$graph_uri>;" ${isql_cmd} ${isql_pass} << EOF &> $vdelete_log + log_enable(3,1); $sparql_delete - checkpoint; exit; EOF echo "" >> $vdelete_log echo "# start time: $start" >> $vdelete_log echo "# user: `whoami`" >> $vdelete_log +echo "# log_enable(3,1);" >> $vdelete_log +echo "# $sparql_delete" >> $vdelete_log echo "# end time: `${CSV2RDF4LOD_HOME}/bin/util/dateInXSDDateTime.sh`" >> $vdelete_log # Move the latest.log pointer to the log we just created. diff --git a/bin/util/virtuoso/vload b/bin/util/virtuoso/vload index 09f095b4..8569949a 100755 --- a/bin/util/virtuoso/vload +++ b/bin/util/virtuoso/vload @@ -125,14 +125,17 @@ fi start=`${CSV2RDF4LOD_HOME}/bin/util/dateInXSDDateTime.sh` ${isql_cmd} ${isql_pwd} << EOF &> $vload_log + log_enable(3,1); $sparql_delete $load_func - checkpoint; exit; EOF echo "" >> $vload_log echo "# start time: $start" >> $vload_log echo "# user: `whoami`" >> $vload_log +echo "# log_enable(3,1);" >> $vload_log +echo "# $sparql_delete" >> $vload_log +echo "# $load_func" >> $vload_log echo "# end time: `${CSV2RDF4LOD_HOME}/bin/util/dateInXSDDateTime.sh`" >> $vload_log # Move the latest.log pointer to the log we just created.