Skip to content

Commit e56c76f

Browse files
Merge pull request #185 from rdfhdt/fix-stringencoding
FIXED: Duplicate triples with plain literals
2 parents b90d8a3 + 2bb9813 commit e56c76f

4 files changed

Lines changed: 463 additions & 33 deletions

File tree

libhdt/include/SingleTriple.hpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ namespace hdt {
4545
#define IS_VARIABLE(a) ( (a).size()>0 && (a).at(0)=='?')
4646
#define IS_URI(a) ( (a).size()>0 && (a).at(0)!='<' && (a).at(0)!='_')
4747
#define IS_LITERAL(a) ( (a).size()>0 && (a).at(0)=='"')
48+
#define IS_SIMPLELITERAL(a) ( (a).size()>0 && (a).at(0)=='"' && (a).at((a).size() - 1)=='"')
4849

50+
const std::string postfix = "^^<http://www.w3.org/2001/XMLSchema#string>";
4951
/**
5052
* Represents a single triple, where the subject, predicate, and object components are
5153
* represented using integer IDs after applying the dictionary conversion.
@@ -316,20 +318,20 @@ class TripleString
316318
TripleString(std::string subject, std::string predicate, std::string object) {
317319
this->subject = subject;
318320
this->predicate = predicate;
319-
this->object = object;
321+
this->setObject(object);
320322
}
321323

322324
TripleString(const TripleString &other) {
323325
this->subject = other.subject;
324326
this->predicate = other.predicate;
325-
this->object = other.object;
327+
this->setObject(other.object);
326328
}
327329

328330
TripleString & operator=(const TripleString &other) {
329331
if(this!=&other) {
330332
this->subject = other.subject;
331333
this->predicate = other.predicate;
332-
this->object = other.object;
334+
this->setObject(other.object);
333335
}
334336
return *this;
335337
}
@@ -360,7 +362,7 @@ class TripleString
360362
inline void setAll(const std::string &subject, const std::string &predicate, const std::string &object) {
361363
this->subject = subject;
362364
this->predicate = predicate;
363-
this->object = object;
365+
this->setObject(object);
364366
}
365367

366368
/**
@@ -393,6 +395,14 @@ class TripleString
393395
*/
394396
void setObject(const std::string &object) {
395397
this->object = object;
398+
399+
// Normalize xsd:strings to simple literals
400+
size_t oSize = object.length();
401+
size_t pfSize = postfix.length();
402+
403+
if (oSize > 0 && oSize >= pfSize && object.compare(oSize - pfSize, pfSize, postfix) == 0) {
404+
this->object.erase(oSize - pfSize);
405+
}
396406
}
397407

398408
/**
@@ -489,7 +499,7 @@ class TripleString
489499
pos_a = pos_b + 1;
490500

491501
// Reads the object
492-
object = line.substr(pos_a);
502+
setObject(line.substr(pos_a));
493503
//if(object[0]=='?') object = "";
494504
pos_a = pos_b;
495505
}

0 commit comments

Comments
 (0)