Skip to content

Commit 42da70f

Browse files
committed
ENHANCED: Improved support for RDF import/output formats.
The following improvements are made: - HDT files can now be created based on TriG input. - HDT files can now be exported in the TriG format. - Command-line tools now correctly advertize all and only supported formats. Implementation details: - `RDFNotation::TRIG' was added. - `RDFNotation::NQUADS' was added. The old `RDFNotation::NQUAD' is left in place, and is marked deprecated. The support for RDF serialization formats contained many bugs: - It was not possible to generate/export HDTs from/to TriG files, even though Serd parses/generates TriG. - Serialization formats `n3' and `rdfxml' were still advertized in command-line tools, even though using these results in an error: $ ./rdf2hdt -f n3 example.ttl example.hdt Catch exception load: Item not found ERROR: Item not found - The Windows-based command-line utility (`rdf2hdt/rdf2hdt.cpp') did not support the abbreviation formats (`nq', `nt', and `ttl'). For consistency, the Git ignore file is updated to exclude N-Quads, TriG, and Turtle files in addition to N-Triples files. Backwards compatibility is ensured by leaving all old `RDFNotation' items in place (with the same iteger value). This includes formats that are not supported, i.e., `N3', `JSON', and `XML'.
1 parent f316f9c commit 42da70f

7 files changed

Lines changed: 56 additions & 61 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@ stamp-h1
5454

5555
*.hdt
5656
*.hdt.index
57+
*.nq
5758
*.nt
5859
*.rdf
60+
*.trig
61+
*.ttl
5962
*.a
6063
**/examples/*
6164
**/tests/*

libhdt/include/HDTEnums.hpp

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -108,37 +108,14 @@ TripleComponentOrder parseOrder(const char *str);
108108
const char *getOrderStr(TripleComponentOrder order);
109109

110110
enum RDFNotation {
111-
/**
112-
* XML notation
113-
*
114-
* @see <a href="http://en.wikipedia.org/wiki/RDF/XML">Wikipedia</a>
115-
*/
116-
XML,
117-
/**
118-
* N-TRIPLES notation
119-
*
120-
* @see <a href="http://en.wikipedia.org/wiki/N-Triples">Wikipedia</a>
121-
*/
111+
XML, // No longer supported.
122112
NTRIPLES,
123-
/**
124-
* TURTLE notation
125-
*
126-
* @see <a href="http://en.wikipedia.org/wiki/Turtle_(syntax)">Wikipedia</a>
127-
*/
128113
TURTLE,
129-
/**
130-
* Notation 3 notation
131-
*
132-
* @see <a href="http://en.wikipedia.org/wiki/Notation_3">Wikipedia</a>
133-
*/
134-
N3,
135-
/**
136-
* NQUAD notation
137-
*
138-
* @see <a href="http://sw.deri.org/2008/07/n-quads/">Wikipedia</a>
139-
*/
140-
NQUAD,
141-
JSON,
114+
N3, // Not supported.
115+
NQUAD, // Deprecated: use `NQUADS' instead.
116+
JSON, // Not supported.
117+
NQUADS,
118+
TRIG,
142119
};
143120

144121
enum ResultEstimationType {

libhdt/src/rdf/RDFParser.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ namespace hdt {
1414

1515
RDFParserCallback *RDFParserCallback::getParserCallback(RDFNotation notation) {
1616
#ifdef HAVE_SERD
17-
if(notation==TURTLE || notation==NTRIPLES || notation==NQUAD) {
17+
if(notation==NQUAD || // Deprecated: use `NQUADS' instead.
18+
notation==NQUADS ||
19+
notation==NTRIPLES ||
20+
notation==TRIG ||
21+
notation==TURTLE) {
1822
return new RDFParserSerd();
1923
}
2024
#else

libhdt/src/rdf/RDFParserSerd.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,18 @@ RDFParserSerd::~RDFParserSerd() {
152152

153153
SerdSyntax RDFParserSerd::getParserType(RDFNotation notation) {
154154
switch(notation){
155+
case NQUAD: // Deprecated: use `NQUADS' instead.
156+
return SERD_NQUADS;
157+
case NQUADS:
158+
return SERD_NQUADS;
155159
case NTRIPLES:
156160
return SERD_NTRIPLES;
157-
case NQUAD:
158-
return SERD_NQUADS;
161+
case TRIG:
162+
return SERD_TRIG;
159163
case TURTLE:
160164
return SERD_TURTLE;
161165
default:
162-
throw ParseException("Serd parser only supports ntriples, nquads, and turtle.");
166+
throw ParseException("Serd parser only supports N-Triples, N-Quads, TriG, and Turtle.");
163167
}
164168
}
165169

libhdt/src/rdf/RDFSerializerSerd.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,18 @@ namespace hdt {
1111

1212
static SerdSyntax getType(RDFNotation notation) {
1313
switch (notation) {
14+
case NQUAD: // Deprecated: use `NQUADS' instead.
15+
return SERD_NQUADS;
16+
case NQUADS:
17+
return SERD_NQUADS;
1418
case NTRIPLES:
1519
return SERD_NTRIPLES;
16-
case NQUAD:
17-
return SERD_NQUADS;
20+
case TRIG:
21+
return SERD_TRIG;
1822
case TURTLE:
1923
return SERD_TURTLE;
2024
default:
21-
throw std::runtime_error("Serd seriaizer only supports ntriples, nquads, and turtle.");
25+
throw std::runtime_error("Serd seriaizer only supports N-Quads, N-Triples, TriG, and Turtle.");
2226
}
2327
}
2428

libhdt/tools/rdf2hdt.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void help() {
5151
cout << "\t-i\t\t\tAlso generate index to solve all triple patterns." << endl;
5252
cout << "\t-c\t<configfile>\tHDT Config options file" << endl;
5353
cout << "\t-o\t<options>\tHDT Additional options (option1=value1;option2=value2;...)" << endl;
54-
cout << "\t-f\t<format>\tFormat of the RDF input (n3, ntriples or nt, nquads or nq, turtle or ttl)" << endl;
54+
cout << "\t-f\t<format>\tFormat of the RDF input (nquads,nq,ntriples,nt,trig,turtle,ttl)" << endl;
5555
cout << "\t-B\t\"<base URI>\"\tBase URI of the dataset." << endl;
5656
cout << "\t-V\tPrints the HDT version number." << endl;
5757
cout << "\t-p\tPrints a progress indicator." << endl;
@@ -173,7 +173,7 @@ int main(int argc, char **argv) {
173173
if (rdfFormat == "" || rdfFormat == "gz")
174174
{
175175
rdfFormat = "nt";
176-
vout << "No input format detected. Using default: NTRIPLES." << endl;
176+
vout << "No input format detected: using N-Triples by default." << endl;
177177
}
178178
}
179179

@@ -183,22 +183,22 @@ int main(int argc, char **argv) {
183183
transform (rdfFormat.begin (), rdfFormat.end (), rdfFormat.begin (), ::tolower);
184184

185185
// Detect input format
186-
if (rdfFormat == "n3")
187-
notation = N3;
188-
else if (rdfFormat == "ntriples" || rdfFormat == "nt")
186+
if (rdfFormat=="nquads" || rdfFormat=="nq") {
187+
notation = NQUADS;
188+
} else if (rdfFormat== "ntriples" || rdfFormat=="nt") {
189189
notation = NTRIPLES;
190-
else if (rdfFormat == "nquads" || rdfFormat == "nq")
191-
notation = NQUAD;
192-
else if (rdfFormat == "turtle" || rdfFormat == "ttl")
190+
} else if (rdfFormat=="trig") {
191+
notation = TRIG;
192+
} else if (rdfFormat=="turtle" || rdfFormat=="ttl") {
193193
notation = TURTLE;
194194
// -f or file extension detected, but didn't match any valid format.
195-
else {
196-
cerr << "ERROR: Detected \"" << rdfFormat << "\" input format. Must be one of:" << endl
197-
<< "\t- n3" << endl
198-
<< "\t- ntriples or nt" << endl
199-
<< "\t- nquads or nq" << endl
200-
<< "\t- turtle or ttl" << endl;
201-
195+
} else {
196+
cerr << "ERROR: Input format `" << rdfFormat << "' is not supported.\n"
197+
<< "Use either of the following:\n"
198+
<< "\t- `ntriples' or `nt' for N-Triples\n"
199+
<< "\t- `nquads' or `nq' for N-Quads\n"
200+
<< "\t- `turtle' or `ttl' for Turtle\n"
201+
<< "\t- `trig' for TriG" << endl;
202202
return 1;
203203
}
204204

rdf2hdt/rdf2hdt.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void help() {
5151
cout << "\t-i\t\tAlso generate index to solve all triple patterns." << endl;
5252
cout << "\t-c\t<configfile>\tHDT Config options file" << endl;
5353
cout << "\t-o\t<options>\tHDT Additional options (option1=value1;option2=value2;...)" << endl;
54-
cout << "\t-f\t<format>\tFormat of the RDF input (ntriples, nquad, n3, turtle, rdfxml)" << endl;
54+
cout << "\t-f\t<format>\tFormat of the RDF input (nquads,nq,ntriples,nt,trig,turtle,ttl)" << endl;
5555
cout << "\t-B\t\"<base URI>\"\tBase URI of the dataset." << endl;
5656
cout << "\t-V\tPrints the HDT version number." << endl;
5757
//cout << "\t-v\tVerbose output" << endl;
@@ -129,18 +129,21 @@ int main(int argc, char **argv) {
129129
}
130130

131131
if(rdfFormat!="") {
132-
if(rdfFormat=="ntriples") {
132+
if(rdfFormat=="nquads" || rdfFormat=="nq") {
133+
notation = NQUADS;
134+
} else if(rdfFormat=="ntriples" || rdfFormat=="nt") {
133135
notation = NTRIPLES;
134-
} else if(rdfFormat=="nquad") {
135-
notation = NQUAD;
136-
} else if(rdfFormat=="n3") {
137-
notation = N3;
138-
} else if(rdfFormat=="turtle") {
136+
} else if(rdfFormat=="trig") {
137+
notation = TRIG;
138+
} else if(rdfFormat=="turtle" || rdfFormat=="ttl") {
139139
notation = TURTLE;
140-
} else if(rdfFormat=="rdfxml") {
141-
notation = XML;
142140
} else {
143-
cout << "ERROR: The RDF input format must be one of: (ntriples, nquad, n3, turtle, rdfxml)" << endl;
141+
cerr << "ERROR: Input format `" << rdfFormat << "' is not supported.\n"
142+
<< "Use either of the following:\n"
143+
<< "\t- `ntriples' or `nt' for N-Triples\n"
144+
<< "\t- `nquads' or `nq' for N-Quads\n"
145+
<< "\t- `turtle' or `ttl' for Turtle\n"
146+
<< "\t- `trig' for TriG" << endl;
144147
help();
145148
return 1;
146149
}

0 commit comments

Comments
 (0)