Skip to content

Commit 4728c14

Browse files
committed
namespace validation, additional json validation and tests, bug where output file was appended
1 parent 477122e commit 4728c14

5 files changed

Lines changed: 158 additions & 61 deletions

File tree

src/lib.rs

Lines changed: 55 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use oxrdf::{BlankNode, Graph, IriParseError, Literal, NamedNodeRef, TripleRef};
2323

2424
use serde_json::{Deserializer, Value};
2525
use std::collections::VecDeque;
26-
use std::fs::{File, OpenOptions};
26+
use std::fs::File;
2727
use std::io::{BufReader, Write};
2828
use thiserror::Error;
2929

@@ -45,6 +45,10 @@ pub enum Json2RdfError {
4545
#[source]
4646
source: IriParseError,
4747
},
48+
49+
/// A root-level JSON value has no predicate context and cannot be converted to a triple.
50+
#[error("unsupported root-level JSON {kind}; root must be an object or array")]
51+
UnsupportedRootValue { kind: &'static str },
4852
}
4953

5054
/// Converts JSON data to RDF format.
@@ -77,11 +81,15 @@ pub fn json_to_rdf(
7781
namespace: &Option<String>,
7882
output_file: &Option<String>,
7983
) -> Result<Option<Graph>, Json2RdfError> {
80-
let rdf_namespace: String = if namespace.is_some() {
84+
let mut prefix: String = if namespace.is_some() {
8185
namespace.clone().unwrap()
8286
} else {
8387
"https://decisym.ai/json2rdf/model".to_owned()
8488
};
89+
// Respect hash (`#`), slash (`/`), and colon (`:`) terminators; otherwise default to `/`.
90+
if !prefix.ends_with(['#', '/', ':']) {
91+
prefix.push('/');
92+
}
8593

8694
let file = File::open(file_path)?;
8795
let reader = BufReader::new(file);
@@ -90,64 +98,57 @@ pub fn json_to_rdf(
9098
let mut graph = Graph::default(); // oxrdf Graph object
9199

92100
let mut subject_stack: VecDeque<BlankNode> = VecDeque::new();
93-
let mut property: Option<String> = None;
94101

95102
for value in stream {
96-
let value = value?;
97-
match value {
98-
Value::Object(obj) => {
99-
let subject = BlankNode::default(); // Create a new blank node
100-
subject_stack.push_back(subject);
101-
102-
for (key, val) in obj {
103-
property = Some(format!("{}/{}", rdf_namespace, key));
104-
process_value(
105-
&mut subject_stack,
106-
&property,
107-
val,
108-
&mut graph,
109-
&rdf_namespace,
110-
)?;
111-
}
112-
113-
subject_stack.pop_back();
114-
}
115-
Value::Array(arr) => {
116-
for val in arr {
117-
process_value(
118-
&mut subject_stack,
119-
&property,
120-
val,
121-
&mut graph,
122-
&rdf_namespace,
123-
)?;
124-
}
125-
}
126-
other => {
127-
process_value(
128-
&mut subject_stack,
129-
&property,
130-
other,
131-
&mut graph,
132-
&rdf_namespace,
133-
)?;
134-
}
135-
}
103+
process_top_level(&mut subject_stack, value?, &mut graph, &prefix)?;
136104
}
137105

138106
if let Some(output_path) = output_file {
139-
let mut file = OpenOptions::new()
140-
.create(true)
141-
.append(true)
142-
.open(output_path)?;
143-
107+
let mut file = File::create(output_path)?;
144108
writeln!(file, "{}", graph)?;
145109
Ok(None)
146110
} else {
147111
Ok(Some(graph))
148112
}
149113
}
150114

115+
/// Processes a single top-level JSON value from the input stream.
116+
///
117+
/// Each top-level value is handled independently: streamed values (NDJSON) do not
118+
/// share predicate state with each other. Root-level primitives have no predicate
119+
/// context and are rejected with [`Json2RdfError::UnsupportedRootValue`].
120+
fn process_top_level(
121+
subject_stack: &mut VecDeque<BlankNode>,
122+
value: Value,
123+
graph: &mut Graph,
124+
prefix: &String,
125+
) -> Result<(), Json2RdfError> {
126+
match value {
127+
Value::Object(obj) => {
128+
let subject = BlankNode::default();
129+
subject_stack.push_back(subject);
130+
131+
for (key, val) in obj {
132+
let property = Some(format!("{}{}", prefix, key));
133+
process_value(subject_stack, &property, val, graph, prefix)?;
134+
}
135+
136+
subject_stack.pop_back();
137+
Ok(())
138+
}
139+
Value::Array(arr) => {
140+
for item in arr {
141+
process_top_level(subject_stack, item, graph, prefix)?;
142+
}
143+
Ok(())
144+
}
145+
Value::Bool(_) => Err(Json2RdfError::UnsupportedRootValue { kind: "boolean" }),
146+
Value::Number(_) => Err(Json2RdfError::UnsupportedRootValue { kind: "number" }),
147+
Value::String(_) => Err(Json2RdfError::UnsupportedRootValue { kind: "string" }),
148+
Value::Null => Err(Json2RdfError::UnsupportedRootValue { kind: "null" }),
149+
}
150+
}
151+
151152
/// This function handles different JSON data types, converting each into RDF triples:
152153
/// - JSON Objects create new blank nodes and recursively process nested values.
153154
/// - JSON Arrays iterate over each element and process it as an individual value.
@@ -165,7 +166,8 @@ pub fn json_to_rdf(
165166
/// - `property`: RDF predicate (property) associated with the JSON value.
166167
/// - `value`: JSON value to process.
167168
/// - `graph`: RDF graph where triples are added.
168-
/// - `namespace`: Namespace for generating predicate URIs.
169+
/// - `prefix`: Fully-prepared namespace prefix (already terminated with `#`, `/`, or `:`)
170+
/// used to build predicate IRIs by direct concatenation with each JSON key.
169171
///
170172
/// # JSON Type to RDF Conversion
171173
/// - **Object**: Creates a blank node and recursively processes key-value pairs.
@@ -178,14 +180,8 @@ fn process_value(
178180
property: &Option<String>,
179181
value: Value,
180182
graph: &mut Graph,
181-
namespace: &String,
183+
prefix: &String,
182184
) -> Result<(), Json2RdfError> {
183-
let ns = if namespace.ends_with("/") {
184-
namespace
185-
} else {
186-
&([namespace, "/"].join(""))
187-
};
188-
189185
let Some(last_subject) = subject_stack.back().cloned() else {
190186
return Ok(());
191187
};
@@ -233,14 +229,14 @@ fn process_value(
233229
subject_stack.push_back(new_subject);
234230

235231
for (key, val) in obj {
236-
let nested_property: Option<String> = Some(format!("{}{}", ns, key));
237-
process_value(subject_stack, &nested_property, val, graph, ns)?;
232+
let nested_property: Option<String> = Some(format!("{}{}", prefix, key));
233+
process_value(subject_stack, &nested_property, val, graph, prefix)?;
238234
}
239235
subject_stack.pop_back();
240236
}
241237
Value::Array(arr) => {
242238
for val in arr {
243-
process_value(subject_stack, property, val, graph, ns)?;
239+
process_value(subject_stack, property, val, graph, prefix)?;
244240
}
245241
}
246242
}

tests/integration_test.rs

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// - BSD 3-Clause License (https://opensource.org/licenses/BSD-3-Clause)
55
// at your option.
66

7-
use json2rdf::json_to_rdf;
7+
use json2rdf::{json_to_rdf, Json2RdfError};
88
use oxrdfio::{RdfFormat, RdfParser};
99
use std::fs::{self, File};
1010

@@ -19,7 +19,6 @@ fn test_graph_triple_count() {
1919
#[test]
2020
fn test_graph_write() {
2121
let output = "out.nt".to_string();
22-
let _ = fs::remove_file(output.clone());
2322

2423
let res = json_to_rdf(
2524
&"tests/airplane.json".to_string(),
@@ -39,3 +38,101 @@ fn test_graph_write() {
3938
assert_eq!(quads.len(), 23);
4039
let _ = fs::remove_file(output.clone());
4140
}
41+
42+
#[test]
43+
fn test_graph_write_truncates_existing() {
44+
let output = "out_truncate.nt".to_string();
45+
46+
// Pre-populate with junk to prove truncation happens.
47+
fs::write(&output, "stale garbage\n").expect("unable to seed stale output");
48+
49+
// Two writes in a row should not accumulate; final file should hold one run's worth.
50+
for _ in 0..2 {
51+
let res = json_to_rdf(
52+
&"tests/airplane.json".to_string(),
53+
&None,
54+
&Some(output.clone()),
55+
);
56+
assert!(res.is_ok());
57+
}
58+
59+
let f = File::open(&output).expect("unable to open output file for result verification");
60+
let quads = RdfParser::from_format(RdfFormat::NTriples)
61+
.for_reader(f)
62+
.collect::<Result<Vec<_>, _>>()
63+
.expect("failed to parse generated output file");
64+
65+
assert_eq!(quads.len(), 23);
66+
let _ = fs::remove_file(&output);
67+
}
68+
69+
#[test]
70+
fn test_root_array() {
71+
let graph = json_to_rdf(&"tests/root_array.json".to_string(), &None, &None)
72+
.expect("conversion failed")
73+
.expect("expected graph");
74+
assert_eq!(graph.len(), 2);
75+
}
76+
77+
#[test]
78+
fn test_root_primitive_errors() {
79+
let result = json_to_rdf(&"tests/root_primitive.json".to_string(), &None, &None);
80+
assert!(matches!(
81+
result,
82+
Err(Json2RdfError::UnsupportedRootValue { kind: "number" })
83+
));
84+
}
85+
86+
#[test]
87+
fn test_ndjson_stream_isolated() {
88+
let graph = json_to_rdf(&"tests/ndjson.json".to_string(), &None, &None)
89+
.expect("conversion failed")
90+
.expect("expected graph");
91+
assert_eq!(graph.len(), 2);
92+
}
93+
94+
#[test]
95+
fn test_hash_namespace_not_mangled() {
96+
let graph = json_to_rdf(
97+
&"tests/airplane.json".to_string(),
98+
&Some("http://example.com/ns#".to_string()),
99+
&None,
100+
)
101+
.expect("conversion failed")
102+
.expect("expected graph");
103+
104+
let serialized = graph.to_string();
105+
assert!(
106+
serialized.contains("<http://example.com/ns#aircraft>"),
107+
"expected predicate to use hash namespace without injected '/', got:\n{}",
108+
serialized
109+
);
110+
assert!(
111+
!serialized.contains("<http://example.com/ns#/"),
112+
"hash namespace should not have '/' appended, got:\n{}",
113+
serialized
114+
);
115+
}
116+
117+
#[test]
118+
fn test_slash_namespace_no_double_slash() {
119+
let graph = json_to_rdf(
120+
&"tests/airplane.json".to_string(),
121+
&Some("http://example.com/ns/".to_string()),
122+
&None,
123+
)
124+
.expect("conversion failed")
125+
.expect("expected graph");
126+
127+
let serialized = graph.to_string();
128+
assert!(
129+
!serialized.contains("<http://example.com/ns//"),
130+
"trailing-slash namespace should not double the slash, got:\n{}",
131+
serialized
132+
);
133+
assert!(
134+
serialized.contains("<http://example.com/ns/aircraft>"),
135+
"expected predicate to use slash namespace without double slash, got:\n{}",
136+
serialized
137+
);
138+
}

tests/ndjson.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"a": 1}
2+
{"b": 2}

tests/root_array.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"a": 1}, {"b": 2}]

tests/root_primitive.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
42

0 commit comments

Comments
 (0)