Skip to content

Commit b306bd8

Browse files
committed
minor optimizations for convert_to_nt logic
1 parent cd8e2d6 commit b306bd8

2 files changed

Lines changed: 58 additions & 16 deletions

File tree

src/builder.rs

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (c) 2025, Decisym, LLC
22
// Licensed under the BSD 3-Clause License (see LICENSE file in the project root).
33

4-
use crate::rdf_reader::convert_to_nt;
4+
use crate::rdf_reader::{concat_nt, convert_to_nt};
55
use log::{debug, error};
66
use std::{
77
fmt,
@@ -58,20 +58,27 @@ pub fn build_hdt<P: AsRef<Path>, Q: AsRef<Path>>(inputs: &[P], dest: Q) -> Resul
5858
}
5959

6060
let timer = std::time::Instant::now();
61-
let first = inputs[0].as_ref();
62-
let is_nt = inputs.len() == 1
63-
&& first
61+
let all_nt = inputs.iter().all(|p| {
62+
p.as_ref()
6463
.extension()
6564
.and_then(|e| e.to_str())
66-
.is_some_and(|e| e.eq_ignore_ascii_case("nt"));
67-
68-
let (nt_path, _tmp_guard): (PathBuf, Option<tempfile::NamedTempFile>) = if is_nt {
69-
(first.to_path_buf(), None)
70-
} else {
71-
let tmp = tempfile::Builder::new().suffix(".nt").tempfile()?;
72-
convert_to_nt(inputs, tmp.reopen()?)?;
73-
(tmp.path().to_path_buf(), Some(tmp))
74-
};
65+
.is_some_and(|e| e.eq_ignore_ascii_case("nt"))
66+
});
67+
68+
let (nt_path, _tmp_guard): (PathBuf, Option<tempfile::NamedTempFile>) =
69+
match (all_nt, inputs.len()) {
70+
(true, 1) => (inputs[0].as_ref().to_path_buf(), None),
71+
(true, _) => {
72+
let tmp = tempfile::Builder::new().suffix(".nt").tempfile()?;
73+
concat_nt(inputs, tmp.reopen()?)?;
74+
(tmp.path().to_path_buf(), Some(tmp))
75+
}
76+
_ => {
77+
let tmp = tempfile::Builder::new().suffix(".nt").tempfile()?;
78+
convert_to_nt(inputs, tmp.reopen()?)?;
79+
(tmp.path().to_path_buf(), Some(tmp))
80+
}
81+
};
7582

7683
let converted_hdt = hdt::Hdt::read_nt(&nt_path)?;
7784

@@ -82,7 +89,7 @@ pub fn build_hdt<P: AsRef<Path>, Q: AsRef<Path>>(inputs: &[P], dest: Q) -> Resul
8289
.create(true)
8390
.truncate(true)
8491
.open(dest.as_ref())?;
85-
let mut writer = BufWriter::new(out_file);
92+
let mut writer = BufWriter::with_capacity(1 << 20, out_file);
8693
converted_hdt.write(&mut writer)?;
8794
writer.flush()?;
8895

@@ -170,6 +177,23 @@ mod tests {
170177
run_sparql_suite("sparql12")
171178
}
172179

180+
#[test]
181+
fn multi_nt_concat() -> Result<(), Error> {
182+
let tmp = tempfile::tempdir()?;
183+
let a = tmp.path().join("a.nt");
184+
let b = tmp.path().join("b.nt");
185+
// `a` intentionally omits a trailing newline to exercise the separator.
186+
std::fs::write(&a, "<http://ex/a> <http://ex/p> <http://ex/o1> .")?;
187+
std::fs::write(&b, "<http://ex/b> <http://ex/p> <http://ex/o2> .\n")?;
188+
189+
let out = tmp.path().join("merged.hdt");
190+
build_hdt(&[&a, &b], &out)?;
191+
192+
let reader = std::io::BufReader::new(std::fs::File::open(&out)?);
193+
hdt::Hdt::read(reader)?;
194+
Ok(())
195+
}
196+
173197
fn find_ttl_files<P: AsRef<std::path::Path>>(dir: P) -> Vec<String> {
174198
WalkDir::new(dir)
175199
.into_iter()

src/rdf_reader.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,20 @@ use std::{
1515
};
1616
use url::Url;
1717

18+
const IO_BUF: usize = 1 << 20;
19+
1820
pub(crate) fn convert_to_nt<P: AsRef<Path>>(
1921
file_paths: &[P],
2022
output_file: std::fs::File,
2123
) -> Result<(), Error> {
22-
let mut dest_writer = BufWriter::new(output_file);
24+
let mut dest_writer = BufWriter::with_capacity(IO_BUF, output_file);
2325
for file in file_paths {
2426
let file = file.as_ref();
2527
let source = std::fs::File::open(file).map_err(|e| {
2628
error!("Error opening file {}: {e:?}", file.display());
2729
e
2830
})?;
29-
let source_reader = BufReader::new(source);
31+
let source_reader = BufReader::with_capacity(IO_BUF, source);
3032

3133
debug!("converting {} to nt format", file.display());
3234

@@ -92,6 +94,22 @@ pub(crate) fn convert_to_nt<P: AsRef<Path>>(
9294
Ok(())
9395
}
9496

97+
pub(crate) fn concat_nt<P: AsRef<Path>>(
98+
file_paths: &[P],
99+
mut output_file: std::fs::File,
100+
) -> Result<(), Error> {
101+
for file in file_paths {
102+
let file = file.as_ref();
103+
let mut source = std::fs::File::open(file).map_err(|e| {
104+
error!("Error opening file {}: {e:?}", file.display());
105+
e
106+
})?;
107+
io::copy(&mut source, &mut output_file)?;
108+
output_file.write_all(b"\n")?;
109+
}
110+
Ok(())
111+
}
112+
95113
#[cfg(test)]
96114
mod tests {
97115

0 commit comments

Comments
 (0)