Skip to content

Commit 9fdab79

Browse files
committed
change in stack implementation, multi-file support clarity/bug, more integ tests
1 parent 4728c14 commit 9fdab79

11 files changed

Lines changed: 154 additions & 67 deletions

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# at your option.
66

77
lint:
8-
cargo install cargo-machete
8+
@command -v cargo-machete >/dev/null 2>&1 || cargo install cargo-machete
99
cargo fmt --check
1010
cargo machete
1111
cargo clippy --benches --tests --bins --no-deps --all-features

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,13 @@ The conversion functionality can also be called directly in Rust. The library su
4242
use json2rdf::json_to_rdf;
4343

4444
// capture conversion results to file
45-
let results = json_to_rdf(&"tests/airplane.json".to_string(), &Some("http://example.com/ns#".to_string()), &Some("output.nt".to_string()));
45+
let results = json_to_rdf(&["tests/airplane.json"], Some("http://example.com/ns#"), Some("output.nt"));
4646

4747
// capture conversion results to an oxrdf::Graph
48-
let results = json_to_rdf(&"tests/airplane.json".to_string(), &Some("http://example.com/ns#".to_string()), &None);
48+
let results = json_to_rdf(&["tests/airplane.json"], Some("http://example.com/ns#"), None);
49+
50+
// multiple input files are merged into a single graph
51+
let results = json_to_rdf(&["a.json", "b.json"], Some("http://example.com/ns#"), None);
4952
```
5053

5154
## License

src/lib.rs

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use oxrdf::vocab::xsd;
2222
use oxrdf::{BlankNode, Graph, IriParseError, Literal, NamedNodeRef, TripleRef};
2323

2424
use serde_json::{Deserializer, Value};
25-
use std::collections::VecDeque;
2625
use std::fs::File;
2726
use std::io::{BufReader, Write};
2827
use thiserror::Error;
@@ -58,49 +57,50 @@ pub enum Json2RdfError {
5857
/// an output file for saving the generated RDF data.
5958
///
6059
/// # Arguments
61-
/// - `file_path`: Path to the JSON file.
60+
/// - `file_paths`: One or more paths to input JSON files. All files are merged into a single graph.
6261
/// - `namespace`: Optional custom namespace for RDF predicates.
6362
/// - `output_file`: Optional output file path for writing RDF data.
6463
///
6564
/// # Errors
66-
/// Returns [`Json2RdfError`] if the input file cannot be read, the JSON cannot be parsed,
65+
/// Returns [`Json2RdfError`] if any input file cannot be read, the JSON cannot be parsed,
6766
/// the output file cannot be written, or a JSON key produces an invalid IRI.
6867
///
6968
/// # Example
7069
/// ```rust
7170
/// use json2rdf::json_to_rdf;
7271
///
73-
/// json_to_rdf(
74-
/// &"tests/airplane.json".to_string(),
75-
/// &Some("http://example.com/ns#".to_string()),
76-
/// &Some("output.nt".to_string()),
77-
/// ).expect("conversion failed");
72+
/// let graph = json_to_rdf(
73+
/// &["tests/airplane.json"],
74+
/// Some("http://example.com/ns#"),
75+
/// None,
76+
/// )
77+
/// .expect("conversion failed")
78+
/// .expect("expected a graph");
79+
/// assert!(!graph.is_empty());
7880
/// ```
7981
pub fn json_to_rdf(
80-
file_path: &String,
81-
namespace: &Option<String>,
82-
output_file: &Option<String>,
82+
file_paths: &[&str],
83+
namespace: Option<&str>,
84+
output_file: Option<&str>,
8385
) -> Result<Option<Graph>, Json2RdfError> {
84-
let mut prefix: String = if namespace.is_some() {
85-
namespace.clone().unwrap()
86-
} else {
87-
"https://decisym.ai/json2rdf/model".to_owned()
88-
};
86+
let mut prefix: String = namespace
87+
.map(str::to_owned)
88+
.unwrap_or_else(|| "https://decisym.ai/json2rdf/model".to_owned());
8989
// Respect hash (`#`), slash (`/`), and colon (`:`) terminators; otherwise default to `/`.
9090
if !prefix.ends_with(['#', '/', ':']) {
9191
prefix.push('/');
9292
}
9393

94-
let file = File::open(file_path)?;
95-
let reader = BufReader::new(file);
96-
let stream = Deserializer::from_reader(reader).into_iter::<Value>();
97-
9894
let mut graph = Graph::default(); // oxrdf Graph object
99-
100-
let mut subject_stack: VecDeque<BlankNode> = VecDeque::new();
101-
102-
for value in stream {
103-
process_top_level(&mut subject_stack, value?, &mut graph, &prefix)?;
95+
let mut subject_stack: Vec<BlankNode> = Vec::new();
96+
97+
for path in file_paths {
98+
let file = File::open(path)?;
99+
let reader = BufReader::new(file);
100+
let stream = Deserializer::from_reader(reader).into_iter::<Value>();
101+
for value in stream {
102+
process_top_level(&mut subject_stack, value?, &mut graph, &prefix)?;
103+
}
104104
}
105105

106106
if let Some(output_path) = output_file {
@@ -118,22 +118,22 @@ pub fn json_to_rdf(
118118
/// share predicate state with each other. Root-level primitives have no predicate
119119
/// context and are rejected with [`Json2RdfError::UnsupportedRootValue`].
120120
fn process_top_level(
121-
subject_stack: &mut VecDeque<BlankNode>,
121+
subject_stack: &mut Vec<BlankNode>,
122122
value: Value,
123123
graph: &mut Graph,
124-
prefix: &String,
124+
prefix: &str,
125125
) -> Result<(), Json2RdfError> {
126126
match value {
127127
Value::Object(obj) => {
128128
let subject = BlankNode::default();
129-
subject_stack.push_back(subject);
129+
subject_stack.push(subject);
130130

131131
for (key, val) in obj {
132132
let property = Some(format!("{}{}", prefix, key));
133133
process_value(subject_stack, &property, val, graph, prefix)?;
134134
}
135135

136-
subject_stack.pop_back();
136+
subject_stack.pop();
137137
Ok(())
138138
}
139139
Value::Array(arr) => {
@@ -176,13 +176,13 @@ fn process_top_level(
176176
/// - **Boolean**: Converts to `xsd:boolean` literal.
177177
/// - **Number**: Converts to `xsd:integer` for whole numbers, `xsd:double` for floating-point values.
178178
fn process_value(
179-
subject_stack: &mut VecDeque<BlankNode>,
179+
subject_stack: &mut Vec<BlankNode>,
180180
property: &Option<String>,
181181
value: Value,
182182
graph: &mut Graph,
183-
prefix: &String,
183+
prefix: &str,
184184
) -> Result<(), Json2RdfError> {
185-
let Some(last_subject) = subject_stack.back().cloned() else {
185+
let Some(last_subject) = subject_stack.last().cloned() else {
186186
return Ok(());
187187
};
188188
let Some(prop) = property else {
@@ -226,13 +226,13 @@ fn process_value(
226226
Value::Object(obj) => {
227227
let new_subject = BlankNode::default();
228228
graph.insert(TripleRef::new(&last_subject, predicate, &new_subject));
229-
subject_stack.push_back(new_subject);
229+
subject_stack.push(new_subject);
230230

231231
for (key, val) in obj {
232232
let nested_property: Option<String> = Some(format!("{}{}", prefix, key));
233233
process_value(subject_stack, &nested_property, val, graph, prefix)?;
234234
}
235-
subject_stack.pop_back();
235+
subject_stack.pop();
236236
}
237237
Value::Array(arr) => {
238238
for val in arr {

src/main.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ enum Commands {
5959
/// Path to input JSON file(s).
6060
///
6161
/// Provide the path to one or more JSON files that will be parsed and converted.
62-
#[arg(short, long)]
63-
json_files: String,
62+
/// All files are merged into a single RDF graph.
63+
#[arg(short, long, required = true, num_args = 1..)]
64+
json_files: Vec<String>,
6465

6566
/// Path to output file.
6667
///
@@ -80,7 +81,8 @@ fn main() {
8081
json_files,
8182
output_file,
8283
}) => {
83-
if let Err(e) = json_to_rdf(json_files, namespace, output_file) {
84+
let paths: Vec<&str> = json_files.iter().map(String::as_str).collect();
85+
if let Err(e) = json_to_rdf(&paths, namespace.as_deref(), output_file.as_deref()) {
8486
eprintln!("json2rdf: {}", e);
8587
std::process::exit(1);
8688
}

tests/empty_file.json

Whitespace-only changes.

tests/empty_object.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

tests/integration_test.rs

Lines changed: 104 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,73 +10,65 @@ use std::fs::{self, File};
1010

1111
#[test]
1212
fn test_graph_triple_count() {
13-
let triple_count_string = json_to_rdf(&"tests/airplane.json".to_string(), &None, &None);
13+
let triple_count_string = json_to_rdf(&["tests/airplane.json"], None, None);
1414

1515
assert!(triple_count_string.is_ok());
1616
assert_eq!(triple_count_string.unwrap().unwrap().len(), 23);
1717
}
1818

1919
#[test]
2020
fn test_graph_write() {
21-
let output = "out.nt".to_string();
21+
let output = "out.nt";
2222

23-
let res = json_to_rdf(
24-
&"tests/airplane.json".to_string(),
25-
&None,
26-
&Some(output.clone()),
27-
);
23+
let res = json_to_rdf(&["tests/airplane.json"], None, Some(output));
2824

2925
assert!(res.is_ok());
3026
assert!(res.unwrap().is_none());
3127

32-
let f = File::open(output.clone()).expect("unable to open output file for result verification");
28+
let f = File::open(output).expect("unable to open output file for result verification");
3329
let quads = RdfParser::from_format(RdfFormat::NTriples)
3430
.for_reader(f)
3531
.collect::<Result<Vec<_>, _>>()
3632
.expect("failed to parse generated output file");
3733

3834
assert_eq!(quads.len(), 23);
39-
let _ = fs::remove_file(output.clone());
35+
let _ = fs::remove_file(output);
4036
}
4137

4238
#[test]
4339
fn test_graph_write_truncates_existing() {
44-
let output = "out_truncate.nt".to_string();
40+
let output = "out_truncate.nt";
4541

4642
// Pre-populate with junk to prove truncation happens.
47-
fs::write(&output, "stale garbage\n").expect("unable to seed stale output");
43+
fs::write(output, "stale garbage\n").expect("unable to seed stale output");
4844

4945
// Two writes in a row should not accumulate; final file should hold one run's worth.
5046
for _ in 0..2 {
51-
let res = json_to_rdf(
52-
&"tests/airplane.json".to_string(),
53-
&None,
54-
&Some(output.clone()),
55-
);
47+
let res = json_to_rdf(&["tests/airplane.json"], None, Some(output));
5648
assert!(res.is_ok());
5749
}
5850

59-
let f = File::open(&output).expect("unable to open output file for result verification");
51+
let f = File::open(output).expect("unable to open output file for result verification");
6052
let quads = RdfParser::from_format(RdfFormat::NTriples)
6153
.for_reader(f)
6254
.collect::<Result<Vec<_>, _>>()
6355
.expect("failed to parse generated output file");
6456

6557
assert_eq!(quads.len(), 23);
66-
let _ = fs::remove_file(&output);
58+
let _ = fs::remove_file(output);
6759
}
6860

6961
#[test]
7062
fn test_root_array() {
71-
let graph = json_to_rdf(&"tests/root_array.json".to_string(), &None, &None)
63+
let graph = json_to_rdf(&["tests/root_array.json"], None, None)
7264
.expect("conversion failed")
7365
.expect("expected graph");
7466
assert_eq!(graph.len(), 2);
7567
}
7668

7769
#[test]
7870
fn test_root_primitive_errors() {
79-
let result = json_to_rdf(&"tests/root_primitive.json".to_string(), &None, &None);
71+
let result = json_to_rdf(&["tests/root_primitive.json"], None, None);
8072
assert!(matches!(
8173
result,
8274
Err(Json2RdfError::UnsupportedRootValue { kind: "number" })
@@ -85,18 +77,103 @@ fn test_root_primitive_errors() {
8577

8678
#[test]
8779
fn test_ndjson_stream_isolated() {
88-
let graph = json_to_rdf(&"tests/ndjson.json".to_string(), &None, &None)
80+
let graph = json_to_rdf(&["tests/ndjson.json"], None, None)
8981
.expect("conversion failed")
9082
.expect("expected graph");
9183
assert_eq!(graph.len(), 2);
9284
}
9385

86+
#[test]
87+
fn test_multi_file_merges_graphs() {
88+
// root_array.json → 2 triples; ndjson.json → 2 triples; merged = 4.
89+
let graph = json_to_rdf(&["tests/root_array.json", "tests/ndjson.json"], None, None)
90+
.expect("conversion failed")
91+
.expect("expected graph");
92+
assert_eq!(graph.len(), 4);
93+
}
94+
95+
#[test]
96+
fn test_large_integers_preserve_precision() {
97+
let graph = json_to_rdf(&["tests/large_int.json"], None, None)
98+
.expect("conversion failed")
99+
.expect("expected graph");
100+
let serialized = graph.to_string();
101+
assert!(
102+
serialized.contains("\"9223372036854775807\"^^<http://www.w3.org/2001/XMLSchema#integer>"),
103+
"i64::MAX should round-trip as xsd:integer, got:\n{}",
104+
serialized
105+
);
106+
assert!(
107+
serialized.contains("\"18446744073709551615\"^^<http://www.w3.org/2001/XMLSchema#integer>"),
108+
"u64::MAX should round-trip as xsd:integer (not xsd:double), got:\n{}",
109+
serialized
110+
);
111+
}
112+
113+
#[test]
114+
fn test_empty_file_returns_empty_graph() {
115+
let graph = json_to_rdf(&["tests/empty_file.json"], None, None)
116+
.expect("conversion failed")
117+
.expect("expected graph");
118+
assert_eq!(graph.len(), 0);
119+
}
120+
121+
#[test]
122+
fn test_empty_object_at_root_produces_no_triples() {
123+
let graph = json_to_rdf(&["tests/empty_object.json"], None, None)
124+
.expect("conversion failed")
125+
.expect("expected graph");
126+
assert_eq!(graph.len(), 0);
127+
}
128+
129+
#[test]
130+
fn test_malformed_json_returns_error() {
131+
let result = json_to_rdf(&["tests/malformed.json"], None, None);
132+
assert!(
133+
matches!(result, Err(Json2RdfError::Json(_))),
134+
"expected Json2RdfError::Json, got {:?}",
135+
result.err()
136+
);
137+
}
138+
139+
#[test]
140+
fn test_missing_file_returns_error() {
141+
let result = json_to_rdf(&["tests/does_not_exist.json"], None, None);
142+
assert!(
143+
matches!(result, Err(Json2RdfError::Io(_))),
144+
"expected Json2RdfError::Io, got {:?}",
145+
result.err()
146+
);
147+
}
148+
149+
#[test]
150+
fn test_invalid_iri_key_returns_error() {
151+
let result = json_to_rdf(&["tests/invalid_iri_key.json"], None, None);
152+
assert!(
153+
matches!(result, Err(Json2RdfError::InvalidIri { .. })),
154+
"expected Json2RdfError::InvalidIri, got {:?}",
155+
result.err()
156+
);
157+
}
158+
159+
#[test]
160+
fn test_unicode_key_in_iri_range() {
161+
let graph = json_to_rdf(&["tests/unicode_key.json"], None, None)
162+
.expect("conversion failed")
163+
.expect("expected graph");
164+
assert_eq!(graph.len(), 1);
165+
assert!(
166+
graph.to_string().contains("中文"),
167+
"expected unicode predicate in output"
168+
);
169+
}
170+
94171
#[test]
95172
fn test_hash_namespace_not_mangled() {
96173
let graph = json_to_rdf(
97-
&"tests/airplane.json".to_string(),
98-
&Some("http://example.com/ns#".to_string()),
99-
&None,
174+
&["tests/airplane.json"],
175+
Some("http://example.com/ns#"),
176+
None,
100177
)
101178
.expect("conversion failed")
102179
.expect("expected graph");
@@ -117,9 +194,9 @@ fn test_hash_namespace_not_mangled() {
117194
#[test]
118195
fn test_slash_namespace_no_double_slash() {
119196
let graph = json_to_rdf(
120-
&"tests/airplane.json".to_string(),
121-
&Some("http://example.com/ns/".to_string()),
122-
&None,
197+
&["tests/airplane.json"],
198+
Some("http://example.com/ns/"),
199+
None,
123200
)
124201
.expect("conversion failed")
125202
.expect("expected graph");

0 commit comments

Comments
 (0)