From 477122e8256f5848d3017eb082eb7ae005eb20a8 Mon Sep 17 00:00:00 2001 From: Gregory Hanson Date: Wed, 15 Apr 2026 09:32:35 -0400 Subject: [PATCH 1/4] return dedicated error types, iri checks, xsd:integer and xsd:float fixes --- Cargo.lock | 1 + Cargo.toml | 1 + src/lib.rs | 181 ++++++++++++++++++++++++++++++---------------------- src/main.rs | 10 +-- 4 files changed, 111 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2730abf..29694f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,6 +148,7 @@ dependencies = [ "oxrdf", "oxrdfio", "serde_json", + "thiserror", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 40e4ba1..b4a9c28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ categories = ["command-line-utilities", "encoding", "parser-implementations", "s clap = { version = "4.6", features = ["derive"] } oxrdf = "0.3" serde_json = "1.0" +thiserror = "2" [dev-dependencies] oxrdfio = "0.2" \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 137bb9e..415ae0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,14 +18,34 @@ //! - Allows specifying a custom RDF namespace for generated predicates and objects. //! - Outputs the RDF data to a specified file or prints it to the console. -use clap::Error; use oxrdf::vocab::xsd; -use oxrdf::{BlankNode, Graph, Literal, NamedNodeRef, TripleRef}; +use oxrdf::{BlankNode, Graph, IriParseError, Literal, NamedNodeRef, TripleRef}; use serde_json::{Deserializer, Value}; use std::collections::VecDeque; use std::fs::{File, OpenOptions}; use std::io::{BufReader, Write}; +use thiserror::Error; + +/// Errors that can occur while converting JSON to RDF. +#[derive(Debug, Error)] +pub enum Json2RdfError { + /// Failure opening, reading, or writing a file. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Failure parsing the input JSON. + #[error("JSON parse error: {0}")] + Json(#[from] serde_json::Error), + + /// A JSON key produced a string that is not a valid IRI. + #[error("invalid IRI {iri:?} generated from JSON key: {source}")] + InvalidIri { + iri: String, + #[source] + source: IriParseError, + }, +} /// Converts JSON data to RDF format. /// @@ -38,24 +58,32 @@ use std::io::{BufReader, Write}; /// - `namespace`: Optional custom namespace for RDF predicates. /// - `output_file`: Optional output file path for writing RDF data. /// +/// # Errors +/// Returns [`Json2RdfError`] if the input file cannot be read, the JSON cannot be parsed, +/// the output file cannot be written, or a JSON key produces an invalid IRI. +/// /// # Example /// ```rust /// use json2rdf::json_to_rdf; /// -/// json_to_rdf(&"tests/airplane.json".to_string(), &Some("http://example.com/ns#".to_string()), &Some("output.nt".to_string())); +/// json_to_rdf( +/// &"tests/airplane.json".to_string(), +/// &Some("http://example.com/ns#".to_string()), +/// &Some("output.nt".to_string()), +/// ).expect("conversion failed"); /// ``` pub fn json_to_rdf( file_path: &String, namespace: &Option, output_file: &Option, -) -> Result, Error> { +) -> Result, Json2RdfError> { let rdf_namespace: String = if namespace.is_some() { namespace.clone().unwrap() } else { "https://decisym.ai/json2rdf/model".to_owned() }; - let file = File::open(file_path).unwrap(); + let file = File::open(file_path)?; let reader = BufReader::new(file); let stream = Deserializer::from_reader(reader).into_iter::(); @@ -65,10 +93,11 @@ pub fn json_to_rdf( let mut property: Option = None; for value in stream { + let value = value?; match value { - Ok(Value::Object(obj)) => { + Value::Object(obj) => { let subject = BlankNode::default(); // Create a new blank node - subject_stack.push_back(subject.clone()); + subject_stack.push_back(subject); for (key, val) in obj { property = Some(format!("{}/{}", rdf_namespace, key)); @@ -78,33 +107,30 @@ pub fn json_to_rdf( val, &mut graph, &rdf_namespace, - ); + )?; } subject_stack.pop_back(); } - Ok(Value::Array(arr)) => { + Value::Array(arr) => { for val in arr { process_value( &mut subject_stack, &property, val, &mut graph, - &rdf_namespace.clone(), - ); + &rdf_namespace, + )?; } } - Ok(other) => { + other => { process_value( &mut subject_stack, &property, other, &mut graph, - &rdf_namespace.clone(), - ); - } - Err(e) => { - eprintln!("Error parsing JSON: {}", e); + &rdf_namespace, + )?; } } } @@ -113,10 +139,9 @@ pub fn json_to_rdf( let mut file = OpenOptions::new() .create(true) .append(true) - .open(output_path) - .expect("Error opening file"); + .open(output_path)?; - writeln!(file, "{}", graph).expect("Error writing json2rdf data to file"); + writeln!(file, "{}", graph)?; Ok(None) } else { Ok(Some(graph)) @@ -147,77 +172,77 @@ pub fn json_to_rdf( /// - **Array**: Iterates over elements and processes each as a separate value. /// - **String**: Converts to `xsd:string` literal. /// - **Boolean**: Converts to `xsd:boolean` literal. -/// - **Number**: Converts to `xsd:int` or `xsd:float` literal based on value type. +/// - **Number**: Converts to `xsd:integer` for whole numbers, `xsd:double` for floating-point values. fn process_value( subject_stack: &mut VecDeque, property: &Option, value: Value, graph: &mut Graph, namespace: &String, -) { +) -> Result<(), Json2RdfError> { let ns = if namespace.ends_with("/") { namespace } else { &([namespace, "/"].join("")) }; - if let Some(last_subject) = subject_stack.clone().back() { - if let Some(prop) = property { - match value { - Value::Bool(b) => { - graph.insert(TripleRef::new( - subject_stack.back().unwrap(), - NamedNodeRef::new(prop.as_str()).unwrap(), - &Literal::new_typed_literal(b.to_string(), xsd::BOOLEAN), - )); - } - Value::Number(num) => { - if num.as_i64().is_some() { - graph.insert(TripleRef::new( - subject_stack.back().unwrap(), - NamedNodeRef::new(prop.as_str()).unwrap(), - &Literal::new_typed_literal(num.to_string(), xsd::INT), - )); - } else if num.as_f64().is_some() { - graph.insert(TripleRef::new( - subject_stack.back().unwrap(), - NamedNodeRef::new(prop.as_str()).unwrap(), - &Literal::new_typed_literal(num.to_string(), xsd::FLOAT), - )); - } - } - Value::String(s) => { - graph.insert(TripleRef::new( - subject_stack.back().unwrap(), - NamedNodeRef::new(prop.as_str()).unwrap(), - &Literal::new_typed_literal(s, xsd::STRING), - )); - } - Value::Null => { - //println!("Null value"); - } - Value::Object(obj) => { - let subject = BlankNode::default(); - subject_stack.push_back(subject); - - graph.insert(TripleRef::new( - last_subject, - NamedNodeRef::new(prop.as_str()).unwrap(), - subject_stack.back().unwrap(), - )); - - for (key, val) in obj { - let nested_property: Option = Some(format!("{}{}", ns, key)); - process_value(subject_stack, &nested_property, val, graph, ns); - } - subject_stack.pop_back(); - } - Value::Array(arr) => { - for val in arr { - process_value(subject_stack, property, val, graph, ns); - } - } + let Some(last_subject) = subject_stack.back().cloned() else { + return Ok(()); + }; + let Some(prop) = property else { + return Ok(()); + }; + + let predicate = + NamedNodeRef::new(prop.as_str()).map_err(|source| Json2RdfError::InvalidIri { + iri: prop.clone(), + source, + })?; + + match value { + Value::Bool(b) => { + graph.insert(TripleRef::new( + &last_subject, + predicate, + &Literal::new_typed_literal(b.to_string(), xsd::BOOLEAN), + )); + } + Value::Number(num) => { + let datatype = if num.is_i64() || num.is_u64() { + xsd::INTEGER + } else { + xsd::DOUBLE + }; + graph.insert(TripleRef::new( + &last_subject, + predicate, + &Literal::new_typed_literal(num.to_string(), datatype), + )); + } + Value::String(s) => { + graph.insert(TripleRef::new( + &last_subject, + predicate, + &Literal::new_typed_literal(s, xsd::STRING), + )); + } + Value::Null => {} + Value::Object(obj) => { + let new_subject = BlankNode::default(); + graph.insert(TripleRef::new(&last_subject, predicate, &new_subject)); + subject_stack.push_back(new_subject); + + for (key, val) in obj { + let nested_property: Option = Some(format!("{}{}", ns, key)); + process_value(subject_stack, &nested_property, val, graph, ns)?; + } + subject_stack.pop_back(); + } + Value::Array(arr) => { + for val in arr { + process_value(subject_stack, property, val, graph, ns)?; } } } + Ok(()) } diff --git a/src/main.rs b/src/main.rs index 57004e4..ecadbbc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,10 +79,12 @@ fn main() { namespace, json_files, output_file, - }) => match json_to_rdf(json_files, namespace, output_file) { - Ok(_) => {} - Err(e) => eprintln!("Error writing: {}", e), - }, + }) => { + if let Err(e) = json_to_rdf(json_files, namespace, output_file) { + eprintln!("json2rdf: {}", e); + std::process::exit(1); + } + } None => {} } } From 4728c1499ad37ed35c80401bf9162568af4ab565 Mon Sep 17 00:00:00 2001 From: Gregory Hanson Date: Wed, 15 Apr 2026 10:37:28 -0400 Subject: [PATCH 2/4] namespace validation, additional json validation and tests, bug where output file was appended --- src/lib.rs | 114 ++++++++++++++++++-------------------- tests/integration_test.rs | 101 ++++++++++++++++++++++++++++++++- tests/ndjson.json | 2 + tests/root_array.json | 1 + tests/root_primitive.json | 1 + 5 files changed, 158 insertions(+), 61 deletions(-) create mode 100644 tests/ndjson.json create mode 100644 tests/root_array.json create mode 100644 tests/root_primitive.json diff --git a/src/lib.rs b/src/lib.rs index 415ae0a..4431671 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ use oxrdf::{BlankNode, Graph, IriParseError, Literal, NamedNodeRef, TripleRef}; use serde_json::{Deserializer, Value}; use std::collections::VecDeque; -use std::fs::{File, OpenOptions}; +use std::fs::File; use std::io::{BufReader, Write}; use thiserror::Error; @@ -45,6 +45,10 @@ pub enum Json2RdfError { #[source] source: IriParseError, }, + + /// A root-level JSON value has no predicate context and cannot be converted to a triple. + #[error("unsupported root-level JSON {kind}; root must be an object or array")] + UnsupportedRootValue { kind: &'static str }, } /// Converts JSON data to RDF format. @@ -77,11 +81,15 @@ pub fn json_to_rdf( namespace: &Option, output_file: &Option, ) -> Result, Json2RdfError> { - let rdf_namespace: String = if namespace.is_some() { + let mut prefix: String = if namespace.is_some() { namespace.clone().unwrap() } else { "https://decisym.ai/json2rdf/model".to_owned() }; + // Respect hash (`#`), slash (`/`), and colon (`:`) terminators; otherwise default to `/`. + if !prefix.ends_with(['#', '/', ':']) { + prefix.push('/'); + } let file = File::open(file_path)?; let reader = BufReader::new(file); @@ -90,57 +98,13 @@ pub fn json_to_rdf( let mut graph = Graph::default(); // oxrdf Graph object let mut subject_stack: VecDeque = VecDeque::new(); - let mut property: Option = None; for value in stream { - let value = value?; - match value { - Value::Object(obj) => { - let subject = BlankNode::default(); // Create a new blank node - subject_stack.push_back(subject); - - for (key, val) in obj { - property = Some(format!("{}/{}", rdf_namespace, key)); - process_value( - &mut subject_stack, - &property, - val, - &mut graph, - &rdf_namespace, - )?; - } - - subject_stack.pop_back(); - } - Value::Array(arr) => { - for val in arr { - process_value( - &mut subject_stack, - &property, - val, - &mut graph, - &rdf_namespace, - )?; - } - } - other => { - process_value( - &mut subject_stack, - &property, - other, - &mut graph, - &rdf_namespace, - )?; - } - } + process_top_level(&mut subject_stack, value?, &mut graph, &prefix)?; } if let Some(output_path) = output_file { - let mut file = OpenOptions::new() - .create(true) - .append(true) - .open(output_path)?; - + let mut file = File::create(output_path)?; writeln!(file, "{}", graph)?; Ok(None) } else { @@ -148,6 +112,43 @@ pub fn json_to_rdf( } } +/// Processes a single top-level JSON value from the input stream. +/// +/// Each top-level value is handled independently: streamed values (NDJSON) do not +/// share predicate state with each other. Root-level primitives have no predicate +/// context and are rejected with [`Json2RdfError::UnsupportedRootValue`]. +fn process_top_level( + subject_stack: &mut VecDeque, + value: Value, + graph: &mut Graph, + prefix: &String, +) -> Result<(), Json2RdfError> { + match value { + Value::Object(obj) => { + let subject = BlankNode::default(); + subject_stack.push_back(subject); + + for (key, val) in obj { + let property = Some(format!("{}{}", prefix, key)); + process_value(subject_stack, &property, val, graph, prefix)?; + } + + subject_stack.pop_back(); + Ok(()) + } + Value::Array(arr) => { + for item in arr { + process_top_level(subject_stack, item, graph, prefix)?; + } + Ok(()) + } + Value::Bool(_) => Err(Json2RdfError::UnsupportedRootValue { kind: "boolean" }), + Value::Number(_) => Err(Json2RdfError::UnsupportedRootValue { kind: "number" }), + Value::String(_) => Err(Json2RdfError::UnsupportedRootValue { kind: "string" }), + Value::Null => Err(Json2RdfError::UnsupportedRootValue { kind: "null" }), + } +} + /// This function handles different JSON data types, converting each into RDF triples: /// - JSON Objects create new blank nodes and recursively process nested values. /// - JSON Arrays iterate over each element and process it as an individual value. @@ -165,7 +166,8 @@ pub fn json_to_rdf( /// - `property`: RDF predicate (property) associated with the JSON value. /// - `value`: JSON value to process. /// - `graph`: RDF graph where triples are added. -/// - `namespace`: Namespace for generating predicate URIs. +/// - `prefix`: Fully-prepared namespace prefix (already terminated with `#`, `/`, or `:`) +/// used to build predicate IRIs by direct concatenation with each JSON key. /// /// # JSON Type to RDF Conversion /// - **Object**: Creates a blank node and recursively processes key-value pairs. @@ -178,14 +180,8 @@ fn process_value( property: &Option, value: Value, graph: &mut Graph, - namespace: &String, + prefix: &String, ) -> Result<(), Json2RdfError> { - let ns = if namespace.ends_with("/") { - namespace - } else { - &([namespace, "/"].join("")) - }; - let Some(last_subject) = subject_stack.back().cloned() else { return Ok(()); }; @@ -233,14 +229,14 @@ fn process_value( subject_stack.push_back(new_subject); for (key, val) in obj { - let nested_property: Option = Some(format!("{}{}", ns, key)); - process_value(subject_stack, &nested_property, val, graph, ns)?; + let nested_property: Option = Some(format!("{}{}", prefix, key)); + process_value(subject_stack, &nested_property, val, graph, prefix)?; } subject_stack.pop_back(); } Value::Array(arr) => { for val in arr { - process_value(subject_stack, property, val, graph, ns)?; + process_value(subject_stack, property, val, graph, prefix)?; } } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 1626779..80edf36 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -4,7 +4,7 @@ // - BSD 3-Clause License (https://opensource.org/licenses/BSD-3-Clause) // at your option. -use json2rdf::json_to_rdf; +use json2rdf::{json_to_rdf, Json2RdfError}; use oxrdfio::{RdfFormat, RdfParser}; use std::fs::{self, File}; @@ -19,7 +19,6 @@ fn test_graph_triple_count() { #[test] fn test_graph_write() { let output = "out.nt".to_string(); - let _ = fs::remove_file(output.clone()); let res = json_to_rdf( &"tests/airplane.json".to_string(), @@ -39,3 +38,101 @@ fn test_graph_write() { assert_eq!(quads.len(), 23); let _ = fs::remove_file(output.clone()); } + +#[test] +fn test_graph_write_truncates_existing() { + let output = "out_truncate.nt".to_string(); + + // Pre-populate with junk to prove truncation happens. + fs::write(&output, "stale garbage\n").expect("unable to seed stale output"); + + // Two writes in a row should not accumulate; final file should hold one run's worth. + for _ in 0..2 { + let res = json_to_rdf( + &"tests/airplane.json".to_string(), + &None, + &Some(output.clone()), + ); + assert!(res.is_ok()); + } + + let f = File::open(&output).expect("unable to open output file for result verification"); + let quads = RdfParser::from_format(RdfFormat::NTriples) + .for_reader(f) + .collect::, _>>() + .expect("failed to parse generated output file"); + + assert_eq!(quads.len(), 23); + let _ = fs::remove_file(&output); +} + +#[test] +fn test_root_array() { + let graph = json_to_rdf(&"tests/root_array.json".to_string(), &None, &None) + .expect("conversion failed") + .expect("expected graph"); + assert_eq!(graph.len(), 2); +} + +#[test] +fn test_root_primitive_errors() { + let result = json_to_rdf(&"tests/root_primitive.json".to_string(), &None, &None); + assert!(matches!( + result, + Err(Json2RdfError::UnsupportedRootValue { kind: "number" }) + )); +} + +#[test] +fn test_ndjson_stream_isolated() { + let graph = json_to_rdf(&"tests/ndjson.json".to_string(), &None, &None) + .expect("conversion failed") + .expect("expected graph"); + assert_eq!(graph.len(), 2); +} + +#[test] +fn test_hash_namespace_not_mangled() { + let graph = json_to_rdf( + &"tests/airplane.json".to_string(), + &Some("http://example.com/ns#".to_string()), + &None, + ) + .expect("conversion failed") + .expect("expected graph"); + + let serialized = graph.to_string(); + assert!( + serialized.contains(""), + "expected predicate to use hash namespace without injected '/', got:\n{}", + serialized + ); + assert!( + !serialized.contains(""), + "expected predicate to use slash namespace without double slash, got:\n{}", + serialized + ); +} diff --git a/tests/ndjson.json b/tests/ndjson.json new file mode 100644 index 0000000..114733c --- /dev/null +++ b/tests/ndjson.json @@ -0,0 +1,2 @@ +{"a": 1} +{"b": 2} diff --git a/tests/root_array.json b/tests/root_array.json new file mode 100644 index 0000000..fb50bce --- /dev/null +++ b/tests/root_array.json @@ -0,0 +1 @@ +[{"a": 1}, {"b": 2}] diff --git a/tests/root_primitive.json b/tests/root_primitive.json new file mode 100644 index 0000000..d81cc07 --- /dev/null +++ b/tests/root_primitive.json @@ -0,0 +1 @@ +42 From 9fdab79e01480c589dce1bf003a81d7ed4d338a9 Mon Sep 17 00:00:00 2001 From: Gregory Hanson Date: Wed, 15 Apr 2026 14:20:59 -0400 Subject: [PATCH 3/4] change in stack implementation, multi-file support clarity/bug, more integ tests --- Makefile | 2 +- README.md | 7 +- src/lib.rs | 68 +++++++++---------- src/main.rs | 8 ++- tests/empty_file.json | 0 tests/empty_object.json | 1 + tests/integration_test.rs | 131 +++++++++++++++++++++++++++++-------- tests/invalid_iri_key.json | 1 + tests/large_int.json | 1 + tests/malformed.json | 1 + tests/unicode_key.json | 1 + 11 files changed, 154 insertions(+), 67 deletions(-) create mode 100644 tests/empty_file.json create mode 100644 tests/empty_object.json create mode 100644 tests/invalid_iri_key.json create mode 100644 tests/large_int.json create mode 100644 tests/malformed.json create mode 100644 tests/unicode_key.json diff --git a/Makefile b/Makefile index 39a6b84..3db913a 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ # at your option. lint: - cargo install cargo-machete + @command -v cargo-machete >/dev/null 2>&1 || cargo install cargo-machete cargo fmt --check cargo machete cargo clippy --benches --tests --bins --no-deps --all-features diff --git a/README.md b/README.md index 8fe8b5c..7efdd21 100644 --- a/README.md +++ b/README.md @@ -42,10 +42,13 @@ The conversion functionality can also be called directly in Rust. The library su use json2rdf::json_to_rdf; // capture conversion results to file -let results = json_to_rdf(&"tests/airplane.json".to_string(), &Some("http://example.com/ns#".to_string()), &Some("output.nt".to_string())); +let results = json_to_rdf(&["tests/airplane.json"], Some("http://example.com/ns#"), Some("output.nt")); // capture conversion results to an oxrdf::Graph -let results = json_to_rdf(&"tests/airplane.json".to_string(), &Some("http://example.com/ns#".to_string()), &None); +let results = json_to_rdf(&["tests/airplane.json"], Some("http://example.com/ns#"), None); + +// multiple input files are merged into a single graph +let results = json_to_rdf(&["a.json", "b.json"], Some("http://example.com/ns#"), None); ``` ## License diff --git a/src/lib.rs b/src/lib.rs index 4431671..26f7da3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,6 @@ use oxrdf::vocab::xsd; use oxrdf::{BlankNode, Graph, IriParseError, Literal, NamedNodeRef, TripleRef}; use serde_json::{Deserializer, Value}; -use std::collections::VecDeque; use std::fs::File; use std::io::{BufReader, Write}; use thiserror::Error; @@ -58,49 +57,50 @@ pub enum Json2RdfError { /// an output file for saving the generated RDF data. /// /// # Arguments -/// - `file_path`: Path to the JSON file. +/// - `file_paths`: One or more paths to input JSON files. All files are merged into a single graph. /// - `namespace`: Optional custom namespace for RDF predicates. /// - `output_file`: Optional output file path for writing RDF data. /// /// # Errors -/// Returns [`Json2RdfError`] if the input file cannot be read, the JSON cannot be parsed, +/// Returns [`Json2RdfError`] if any input file cannot be read, the JSON cannot be parsed, /// the output file cannot be written, or a JSON key produces an invalid IRI. /// /// # Example /// ```rust /// use json2rdf::json_to_rdf; /// -/// json_to_rdf( -/// &"tests/airplane.json".to_string(), -/// &Some("http://example.com/ns#".to_string()), -/// &Some("output.nt".to_string()), -/// ).expect("conversion failed"); +/// let graph = json_to_rdf( +/// &["tests/airplane.json"], +/// Some("http://example.com/ns#"), +/// None, +/// ) +/// .expect("conversion failed") +/// .expect("expected a graph"); +/// assert!(!graph.is_empty()); /// ``` pub fn json_to_rdf( - file_path: &String, - namespace: &Option, - output_file: &Option, + file_paths: &[&str], + namespace: Option<&str>, + output_file: Option<&str>, ) -> Result, Json2RdfError> { - let mut prefix: String = if namespace.is_some() { - namespace.clone().unwrap() - } else { - "https://decisym.ai/json2rdf/model".to_owned() - }; + let mut prefix: String = namespace + .map(str::to_owned) + .unwrap_or_else(|| "https://decisym.ai/json2rdf/model".to_owned()); // Respect hash (`#`), slash (`/`), and colon (`:`) terminators; otherwise default to `/`. if !prefix.ends_with(['#', '/', ':']) { prefix.push('/'); } - let file = File::open(file_path)?; - let reader = BufReader::new(file); - let stream = Deserializer::from_reader(reader).into_iter::(); - let mut graph = Graph::default(); // oxrdf Graph object - - let mut subject_stack: VecDeque = VecDeque::new(); - - for value in stream { - process_top_level(&mut subject_stack, value?, &mut graph, &prefix)?; + let mut subject_stack: Vec = Vec::new(); + + for path in file_paths { + let file = File::open(path)?; + let reader = BufReader::new(file); + let stream = Deserializer::from_reader(reader).into_iter::(); + for value in stream { + process_top_level(&mut subject_stack, value?, &mut graph, &prefix)?; + } } if let Some(output_path) = output_file { @@ -118,22 +118,22 @@ pub fn json_to_rdf( /// share predicate state with each other. Root-level primitives have no predicate /// context and are rejected with [`Json2RdfError::UnsupportedRootValue`]. fn process_top_level( - subject_stack: &mut VecDeque, + subject_stack: &mut Vec, value: Value, graph: &mut Graph, - prefix: &String, + prefix: &str, ) -> Result<(), Json2RdfError> { match value { Value::Object(obj) => { let subject = BlankNode::default(); - subject_stack.push_back(subject); + subject_stack.push(subject); for (key, val) in obj { let property = Some(format!("{}{}", prefix, key)); process_value(subject_stack, &property, val, graph, prefix)?; } - subject_stack.pop_back(); + subject_stack.pop(); Ok(()) } Value::Array(arr) => { @@ -176,13 +176,13 @@ fn process_top_level( /// - **Boolean**: Converts to `xsd:boolean` literal. /// - **Number**: Converts to `xsd:integer` for whole numbers, `xsd:double` for floating-point values. fn process_value( - subject_stack: &mut VecDeque, + subject_stack: &mut Vec, property: &Option, value: Value, graph: &mut Graph, - prefix: &String, + prefix: &str, ) -> Result<(), Json2RdfError> { - let Some(last_subject) = subject_stack.back().cloned() else { + let Some(last_subject) = subject_stack.last().cloned() else { return Ok(()); }; let Some(prop) = property else { @@ -226,13 +226,13 @@ fn process_value( Value::Object(obj) => { let new_subject = BlankNode::default(); graph.insert(TripleRef::new(&last_subject, predicate, &new_subject)); - subject_stack.push_back(new_subject); + subject_stack.push(new_subject); for (key, val) in obj { let nested_property: Option = Some(format!("{}{}", prefix, key)); process_value(subject_stack, &nested_property, val, graph, prefix)?; } - subject_stack.pop_back(); + subject_stack.pop(); } Value::Array(arr) => { for val in arr { diff --git a/src/main.rs b/src/main.rs index ecadbbc..1f733a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -59,8 +59,9 @@ enum Commands { /// Path to input JSON file(s). /// /// Provide the path to one or more JSON files that will be parsed and converted. - #[arg(short, long)] - json_files: String, + /// All files are merged into a single RDF graph. + #[arg(short, long, required = true, num_args = 1..)] + json_files: Vec, /// Path to output file. /// @@ -80,7 +81,8 @@ fn main() { json_files, output_file, }) => { - if let Err(e) = json_to_rdf(json_files, namespace, output_file) { + let paths: Vec<&str> = json_files.iter().map(String::as_str).collect(); + if let Err(e) = json_to_rdf(&paths, namespace.as_deref(), output_file.as_deref()) { eprintln!("json2rdf: {}", e); std::process::exit(1); } diff --git a/tests/empty_file.json b/tests/empty_file.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/empty_object.json b/tests/empty_object.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/tests/empty_object.json @@ -0,0 +1 @@ +{} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 80edf36..aa1e8c8 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -10,7 +10,7 @@ use std::fs::{self, File}; #[test] fn test_graph_triple_count() { - let triple_count_string = json_to_rdf(&"tests/airplane.json".to_string(), &None, &None); + let triple_count_string = json_to_rdf(&["tests/airplane.json"], None, None); assert!(triple_count_string.is_ok()); assert_eq!(triple_count_string.unwrap().unwrap().len(), 23); @@ -18,57 +18,49 @@ fn test_graph_triple_count() { #[test] fn test_graph_write() { - let output = "out.nt".to_string(); + let output = "out.nt"; - let res = json_to_rdf( - &"tests/airplane.json".to_string(), - &None, - &Some(output.clone()), - ); + let res = json_to_rdf(&["tests/airplane.json"], None, Some(output)); assert!(res.is_ok()); assert!(res.unwrap().is_none()); - let f = File::open(output.clone()).expect("unable to open output file for result verification"); + let f = File::open(output).expect("unable to open output file for result verification"); let quads = RdfParser::from_format(RdfFormat::NTriples) .for_reader(f) .collect::, _>>() .expect("failed to parse generated output file"); assert_eq!(quads.len(), 23); - let _ = fs::remove_file(output.clone()); + let _ = fs::remove_file(output); } #[test] fn test_graph_write_truncates_existing() { - let output = "out_truncate.nt".to_string(); + let output = "out_truncate.nt"; // Pre-populate with junk to prove truncation happens. - fs::write(&output, "stale garbage\n").expect("unable to seed stale output"); + fs::write(output, "stale garbage\n").expect("unable to seed stale output"); // Two writes in a row should not accumulate; final file should hold one run's worth. for _ in 0..2 { - let res = json_to_rdf( - &"tests/airplane.json".to_string(), - &None, - &Some(output.clone()), - ); + let res = json_to_rdf(&["tests/airplane.json"], None, Some(output)); assert!(res.is_ok()); } - let f = File::open(&output).expect("unable to open output file for result verification"); + let f = File::open(output).expect("unable to open output file for result verification"); let quads = RdfParser::from_format(RdfFormat::NTriples) .for_reader(f) .collect::, _>>() .expect("failed to parse generated output file"); assert_eq!(quads.len(), 23); - let _ = fs::remove_file(&output); + let _ = fs::remove_file(output); } #[test] fn test_root_array() { - let graph = json_to_rdf(&"tests/root_array.json".to_string(), &None, &None) + let graph = json_to_rdf(&["tests/root_array.json"], None, None) .expect("conversion failed") .expect("expected graph"); assert_eq!(graph.len(), 2); @@ -76,7 +68,7 @@ fn test_root_array() { #[test] fn test_root_primitive_errors() { - let result = json_to_rdf(&"tests/root_primitive.json".to_string(), &None, &None); + let result = json_to_rdf(&["tests/root_primitive.json"], None, None); assert!(matches!( result, Err(Json2RdfError::UnsupportedRootValue { kind: "number" }) @@ -85,18 +77,103 @@ fn test_root_primitive_errors() { #[test] fn test_ndjson_stream_isolated() { - let graph = json_to_rdf(&"tests/ndjson.json".to_string(), &None, &None) + let graph = json_to_rdf(&["tests/ndjson.json"], None, None) .expect("conversion failed") .expect("expected graph"); assert_eq!(graph.len(), 2); } +#[test] +fn test_multi_file_merges_graphs() { + // root_array.json → 2 triples; ndjson.json → 2 triples; merged = 4. + let graph = json_to_rdf(&["tests/root_array.json", "tests/ndjson.json"], None, None) + .expect("conversion failed") + .expect("expected graph"); + assert_eq!(graph.len(), 4); +} + +#[test] +fn test_large_integers_preserve_precision() { + let graph = json_to_rdf(&["tests/large_int.json"], None, None) + .expect("conversion failed") + .expect("expected graph"); + let serialized = graph.to_string(); + assert!( + serialized.contains("\"9223372036854775807\"^^"), + "i64::MAX should round-trip as xsd:integer, got:\n{}", + serialized + ); + assert!( + serialized.contains("\"18446744073709551615\"^^"), + "u64::MAX should round-trip as xsd:integer (not xsd:double), got:\n{}", + serialized + ); +} + +#[test] +fn test_empty_file_returns_empty_graph() { + let graph = json_to_rdf(&["tests/empty_file.json"], None, None) + .expect("conversion failed") + .expect("expected graph"); + assert_eq!(graph.len(), 0); +} + +#[test] +fn test_empty_object_at_root_produces_no_triples() { + let graph = json_to_rdf(&["tests/empty_object.json"], None, None) + .expect("conversion failed") + .expect("expected graph"); + assert_eq!(graph.len(), 0); +} + +#[test] +fn test_malformed_json_returns_error() { + let result = json_to_rdf(&["tests/malformed.json"], None, None); + assert!( + matches!(result, Err(Json2RdfError::Json(_))), + "expected Json2RdfError::Json, got {:?}", + result.err() + ); +} + +#[test] +fn test_missing_file_returns_error() { + let result = json_to_rdf(&["tests/does_not_exist.json"], None, None); + assert!( + matches!(result, Err(Json2RdfError::Io(_))), + "expected Json2RdfError::Io, got {:?}", + result.err() + ); +} + +#[test] +fn test_invalid_iri_key_returns_error() { + let result = json_to_rdf(&["tests/invalid_iri_key.json"], None, None); + assert!( + matches!(result, Err(Json2RdfError::InvalidIri { .. })), + "expected Json2RdfError::InvalidIri, got {:?}", + result.err() + ); +} + +#[test] +fn test_unicode_key_in_iri_range() { + let graph = json_to_rdf(&["tests/unicode_key.json"], None, None) + .expect("conversion failed") + .expect("expected graph"); + assert_eq!(graph.len(), 1); + assert!( + graph.to_string().contains("中文"), + "expected unicode predicate in output" + ); +} + #[test] fn test_hash_namespace_not_mangled() { let graph = json_to_rdf( - &"tests/airplane.json".to_string(), - &Some("http://example.com/ns#".to_string()), - &None, + &["tests/airplane.json"], + Some("http://example.com/ns#"), + None, ) .expect("conversion failed") .expect("expected graph"); @@ -117,9 +194,9 @@ fn test_hash_namespace_not_mangled() { #[test] fn test_slash_namespace_no_double_slash() { let graph = json_to_rdf( - &"tests/airplane.json".to_string(), - &Some("http://example.com/ns/".to_string()), - &None, + &["tests/airplane.json"], + Some("http://example.com/ns/"), + None, ) .expect("conversion failed") .expect("expected graph"); diff --git a/tests/invalid_iri_key.json b/tests/invalid_iri_key.json new file mode 100644 index 0000000..78e48d7 --- /dev/null +++ b/tests/invalid_iri_key.json @@ -0,0 +1 @@ +{"bad key": 1} diff --git a/tests/large_int.json b/tests/large_int.json new file mode 100644 index 0000000..edbe91f --- /dev/null +++ b/tests/large_int.json @@ -0,0 +1 @@ +{"big": 9223372036854775807, "bigger": 18446744073709551615} diff --git a/tests/malformed.json b/tests/malformed.json new file mode 100644 index 0000000..6b7a9f4 --- /dev/null +++ b/tests/malformed.json @@ -0,0 +1 @@ +not json diff --git a/tests/unicode_key.json b/tests/unicode_key.json new file mode 100644 index 0000000..e8ec853 --- /dev/null +++ b/tests/unicode_key.json @@ -0,0 +1 @@ +{"中文": 42} From 606de8d71a6e84ad5e72554f9d8e0d7c12aff0d7 Mon Sep 17 00:00:00 2001 From: Gregory Hanson Date: Wed, 15 Apr 2026 14:21:44 -0400 Subject: [PATCH 4/4] bump version --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29694f3..2ab1fb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -142,7 +142,7 @@ checksum = "574b0cd5e90ee2ba03a66d0611fc9a09c9a0c28b2ecc2dc8a181dd31a53ca5d7" [[package]] name = "json2rdf" -version = "0.1.1" +version = "0.2.0" dependencies = [ "clap", "oxrdf", diff --git a/Cargo.toml b/Cargo.toml index b4a9c28..05a5699 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json2rdf" -version = "0.1.1" +version = "0.2.0" authors = ["bharath181 ", "Greg Hanson "] edition = "2021" license = "BSD-3-Clause OR Apache-2.0"