11// Copyright (c) 2025, Decisym, LLC
22// Licensed under the BSD 3-Clause License (see LICENSE file in the project root).
33
4- use crate :: rdf_reader:: convert_to_nt;
4+ use crate :: rdf_reader:: { concat_nt , convert_to_nt} ;
55use log:: { debug, error} ;
66use std:: {
77 fmt,
@@ -58,20 +58,27 @@ pub fn build_hdt<P: AsRef<Path>, Q: AsRef<Path>>(inputs: &[P], dest: Q) -> Resul
5858 }
5959
6060 let timer = std:: time:: Instant :: now ( ) ;
61- let first = inputs[ 0 ] . as_ref ( ) ;
62- let is_nt = inputs. len ( ) == 1
63- && first
61+ let all_nt = inputs. iter ( ) . all ( |p| {
62+ p. as_ref ( )
6463 . extension ( )
6564 . and_then ( |e| e. to_str ( ) )
66- . is_some_and ( |e| e. eq_ignore_ascii_case ( "nt" ) ) ;
67-
68- let ( nt_path, _tmp_guard) : ( PathBuf , Option < tempfile:: NamedTempFile > ) = if is_nt {
69- ( first. to_path_buf ( ) , None )
70- } else {
71- let tmp = tempfile:: Builder :: new ( ) . suffix ( ".nt" ) . tempfile ( ) ?;
72- convert_to_nt ( inputs, tmp. reopen ( ) ?) ?;
73- ( tmp. path ( ) . to_path_buf ( ) , Some ( tmp) )
74- } ;
65+ . is_some_and ( |e| e. eq_ignore_ascii_case ( "nt" ) )
66+ } ) ;
67+
68+ let ( nt_path, _tmp_guard) : ( PathBuf , Option < tempfile:: NamedTempFile > ) =
69+ match ( all_nt, inputs. len ( ) ) {
70+ ( true , 1 ) => ( inputs[ 0 ] . as_ref ( ) . to_path_buf ( ) , None ) ,
71+ ( true , _) => {
72+ let tmp = tempfile:: Builder :: new ( ) . suffix ( ".nt" ) . tempfile ( ) ?;
73+ concat_nt ( inputs, tmp. reopen ( ) ?) ?;
74+ ( tmp. path ( ) . to_path_buf ( ) , Some ( tmp) )
75+ }
76+ _ => {
77+ let tmp = tempfile:: Builder :: new ( ) . suffix ( ".nt" ) . tempfile ( ) ?;
78+ convert_to_nt ( inputs, tmp. reopen ( ) ?) ?;
79+ ( tmp. path ( ) . to_path_buf ( ) , Some ( tmp) )
80+ }
81+ } ;
7582
7683 let converted_hdt = hdt:: Hdt :: read_nt ( & nt_path) ?;
7784
@@ -82,7 +89,7 @@ pub fn build_hdt<P: AsRef<Path>, Q: AsRef<Path>>(inputs: &[P], dest: Q) -> Resul
8289 . create ( true )
8390 . truncate ( true )
8491 . open ( dest. as_ref ( ) ) ?;
85- let mut writer = BufWriter :: new ( out_file) ;
92+ let mut writer = BufWriter :: with_capacity ( 1 << 20 , out_file) ;
8693 converted_hdt. write ( & mut writer) ?;
8794 writer. flush ( ) ?;
8895
@@ -170,6 +177,23 @@ mod tests {
170177 run_sparql_suite ( "sparql12" )
171178 }
172179
180+ #[ test]
181+ fn multi_nt_concat ( ) -> Result < ( ) , Error > {
182+ let tmp = tempfile:: tempdir ( ) ?;
183+ let a = tmp. path ( ) . join ( "a.nt" ) ;
184+ let b = tmp. path ( ) . join ( "b.nt" ) ;
185+ // `a` intentionally omits a trailing newline to exercise the separator.
186+ std:: fs:: write ( & a, "<http://ex/a> <http://ex/p> <http://ex/o1> ." ) ?;
187+ std:: fs:: write ( & b, "<http://ex/b> <http://ex/p> <http://ex/o2> .\n " ) ?;
188+
189+ let out = tmp. path ( ) . join ( "merged.hdt" ) ;
190+ build_hdt ( & [ & a, & b] , & out) ?;
191+
192+ let reader = std:: io:: BufReader :: new ( std:: fs:: File :: open ( & out) ?) ;
193+ hdt:: Hdt :: read ( reader) ?;
194+ Ok ( ( ) )
195+ }
196+
173197 fn find_ttl_files < P : AsRef < std:: path:: Path > > ( dir : P ) -> Vec < String > {
174198 WalkDir :: new ( dir)
175199 . into_iter ( )
0 commit comments