Skip to content

Commit b0df3db

Browse files
committed
add w3c/rdf-tests as submodule. fixes for found conversion failures
1 parent 2eb49f8 commit b0df3db

9 files changed

Lines changed: 114 additions & 12 deletions

File tree

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "tests/resources/rdf-tests"]
2+
path = tests/resources/rdf-tests
3+
url = https://github.com/w3c/rdf-tests.git

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ name = "benchmark"
1515
harness = false
1616

1717
[dependencies]
18-
anyhow = "1.0.98"
19-
chrono = "0.4.40"
18+
chrono = "0.4"
2019
clap = { version = "4.5", features = ["derive","cargo"] }
2120
clap-verbosity-flag = "3.0"
2221
crc = "3.2.1"
@@ -30,6 +29,7 @@ tempfile = "3.19"
3029
[dev-dependencies]
3130
criterion = { version = "0.5", features = ["async_tokio"]}
3231
pprof = { version = "0.14", features = ["protobuf", "protobuf-codec", "criterion"] }
32+
walkdir = "2.5"
3333

3434
[package.metadata.cargo-machete]
35-
ignored = ["iref", "langtag", "hdt"]
35+
ignored = ["iref", "langtag"]

src/bitmap_triples.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,11 @@ impl BitmapTriplesBuilder {
121121
// bitmapZ->save(output);
122122
self.save_bitmap(&self.bitmap_z, dest_writer)?;
123123

124-
let num_bits = self.num_triples.ilog2() + 1;
124+
let num_bits = if self.num_triples == 0 {
125+
0
126+
} else {
127+
self.num_triples.ilog2() + 1
128+
};
125129
if num_bits > u8::MAX as u32 {
126130
panic!("bits_per_entry too large")
127131
}

src/builder.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ mod tests {
282282
io::{BufReader, Read},
283283
path::Path,
284284
};
285+
use tempfile::tempdir;
286+
use walkdir::WalkDir;
285287

286288
#[test]
287289
fn test_build_hdt() {
@@ -343,4 +345,90 @@ mod tests {
343345
.collect::<Vec<_>>();
344346
assert_eq!(triple_vec, res)
345347
}
348+
349+
#[test]
350+
fn test_sparql10() {
351+
let tmp_dir = tempdir().expect("failed to create temp dir");
352+
let input_files = find_ttl_files("tests/resources/rdf-tests/sparql/sparql10");
353+
for f in &input_files {
354+
let new_hdt = format!(
355+
"{}/{}",
356+
tmp_dir.path().to_str().unwrap(),
357+
Path::new(f)
358+
.file_name()
359+
.unwrap()
360+
.to_str()
361+
.unwrap()
362+
.replace(".ttl", ".hdt")
363+
);
364+
365+
let res = build_hdt(vec![f.clone()], new_hdt.as_str(), Options::default());
366+
assert!(res.is_ok());
367+
}
368+
}
369+
370+
#[test]
371+
fn test_sparql11() {
372+
let tmp_dir = tempdir().expect("failed to create temp dir");
373+
let input_files = find_ttl_files("tests/resources/rdf-tests/sparql/sparql11");
374+
for f in &input_files {
375+
let new_hdt = format!(
376+
"{}/{}",
377+
tmp_dir.path().to_str().unwrap(),
378+
Path::new(f)
379+
.file_name()
380+
.unwrap()
381+
.to_str()
382+
.unwrap()
383+
.replace(".ttl", ".hdt")
384+
);
385+
386+
let res = build_hdt(vec![f.clone()], new_hdt.as_str(), Options::default());
387+
match &res {
388+
Ok(_) => {}
389+
Err(e) => {
390+
eprintln!("{f} failed to convert: {e}")
391+
}
392+
}
393+
394+
assert!(res.is_ok());
395+
}
396+
}
397+
398+
#[test]
399+
fn test_sparql12() {
400+
let tmp_dir = tempdir().expect("failed to create temp dir");
401+
let input_files = find_ttl_files("tests/resources/rdf-tests/sparql/sparql12");
402+
for f in &input_files {
403+
let new_hdt = format!(
404+
"{}/{}",
405+
tmp_dir.path().to_str().unwrap(),
406+
Path::new(f)
407+
.file_name()
408+
.unwrap()
409+
.to_str()
410+
.unwrap()
411+
.replace(".ttl", ".hdt")
412+
);
413+
414+
let res = build_hdt(vec![f.clone()], new_hdt.as_str(), Options::default());
415+
match &res {
416+
Ok(_) => {}
417+
Err(e) => {
418+
eprintln!("{f} failed to convert: {e}")
419+
}
420+
}
421+
422+
assert!(res.is_ok());
423+
}
424+
}
425+
426+
fn find_ttl_files<P: AsRef<Path>>(dir: P) -> Vec<String> {
427+
WalkDir::new(dir)
428+
.into_iter()
429+
.filter_map(|e| e.ok())
430+
.filter(|e| e.path().extension().is_some_and(|ext| ext == "ttl"))
431+
.map(|e| e.path().display().to_string())
432+
.collect()
433+
}
346434
}

src/common.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ pub fn save_u32_vec(
4646

4747
// TODO duplicate of containers/sequence.rs::save()
4848
fn pack_bits(data: &[u32], bits_per_entry: u8) -> Vec<u8> {
49-
assert!(bits_per_entry > 0 && bits_per_entry as usize <= std::mem::size_of::<usize>() * 8);
50-
5149
let mut output = Vec::new();
5250
let mut current_byte = 0u8;
5351
let mut bit_offset = 0;

src/dictionary.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use hdt::{
55
containers::{self, ControlType, Sequence, vbyte::encode_vbyte},
66
dict_sect_pfc::DictSectPFC,
77
};
8-
use log::{debug, error};
8+
use log::{debug, error, warn};
99
use oxrdf::Term;
1010
use oxrdfio::RdfFormat::NTriples;
1111
use oxrdfio::RdfParser;
@@ -90,8 +90,7 @@ impl FourSectDictBuilder {
9090
object_terms.insert(term_to_hdt_bgp_str(&q.object)?);
9191
}
9292
if dict.predicate_terms.is_empty() {
93-
error!("no triples found in provided RDF");
94-
return Err(anyhow::anyhow!("no triples found in input file").into());
93+
warn!("no triples found in provided RDF");
9594
}
9695

9796
dict.shared_terms = subject_terms.intersection(&object_terms).cloned().collect();
@@ -282,7 +281,11 @@ pub fn compress(set: &BTreeSet<String>, block_size: usize) -> Result<DictSectPFC
282281

283282
// offsets are an increasing list of array indices, therefore the last one will be the largest
284283
// TODO: potential off by 1 in comparison with hdt-cpp implementation?
285-
let bits_per_entry = (offsets.last().unwrap().ilog2() + 1) as usize;
284+
let bits_per_entry = if num_terms == 0 {
285+
0
286+
} else {
287+
(offsets.last().unwrap().ilog2() + 1) as usize
288+
};
286289

287290
Ok(DictSectPFC {
288291
num_strings: num_terms,

src/rdf_reader.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@ pub fn convert_to_nt(
4545
)
4646
.into());
4747
};
48-
let quads = RdfParser::from_format(rdf_format).for_reader(source_reader);
48+
let quads = RdfParser::from_format(rdf_format)
49+
.with_base_iri(format!(
50+
"file://{}",
51+
Path::new(&file).file_name().unwrap().to_str().unwrap()
52+
))?
53+
.for_reader(source_reader);
4954
let mut warned = false;
5055
for q in quads {
5156
let q = match q {

tests/resources/rdf-tests

Submodule rdf-tests added at 7d298cb

0 commit comments

Comments
 (0)