-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrdf_reader.rs
More file actions
113 lines (104 loc) · 3.63 KB
/
rdf_reader.rs
File metadata and controls
113 lines (104 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Copyright (c) 2025, Decisym, LLC
// Licensed under the BSD 3-Clause License (see LICENSE file in the project root).
use log::{debug, error, warn};
use oxrdfio::RdfSerializer;
use oxrdfio::{
RdfFormat::{self, NTriples},
RdfParseError, RdfParser,
};
use std::io::Write;
use std::{
error::Error,
io::{BufReader, BufWriter},
path::Path,
};
pub fn convert_to_nt(
file_paths: Vec<String>,
output_file: std::fs::File,
) -> Result<(), Box<dyn Error>> {
let mut dest_writer = BufWriter::new(output_file);
for file in file_paths {
let source = match std::fs::File::open(&file) {
Ok(f) => f,
Err(e) => {
error!("Error opening file {file:?}: {e:?}");
return Err(e.into());
}
};
let source_reader = BufReader::new(source);
debug!("converting {} to nt format", &file);
let mut serializer = RdfSerializer::from_format(NTriples).for_writer(dest_writer.by_ref());
let v = std::time::Instant::now();
let rdf_format = if let Some(t) =
RdfFormat::from_extension(Path::new(&file).extension().unwrap().to_str().unwrap())
{
t
} else {
error!("unrecognized file extension for {file}");
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("unrecognized file extension for {file}"),
)
.into());
};
let quads = RdfParser::from_format(rdf_format)
.with_base_iri(format!(
"file://{}",
Path::new(&file).file_name().unwrap().to_str().unwrap()
))?
.for_reader(source_reader);
let mut warned = false;
for q in quads {
let q = match q {
Ok(v) => v,
Err(e) => {
match e {
RdfParseError::Io(v) => {
// I/O error while reading file
error!("Error reading file {file}: {v}");
return Err(v.into());
}
RdfParseError::Syntax(syn_err) => {
error!("syntax error for RDF file {file}: {syn_err}");
return Err(syn_err.into());
}
}
}
};
if !warned && q.graph_name != oxrdf::GraphName::DefaultGraph {
warned = true;
warn!("HDT does not support named graphs, merging triples for {file}");
}
serializer.serialize_triple(oxrdf::TripleRef {
subject: q.subject.as_ref(),
predicate: q.predicate.as_ref(),
object: q.object.as_ref(),
})?
}
serializer.finish()?;
debug!("RDF to NTriple convert time: {:?}", v.elapsed());
}
dest_writer.flush()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rdf() {
let tmp_file = tempfile::Builder::new().suffix(".nt").tempfile().expect("");
assert!(
(convert_to_nt(
vec!["tests/resources/apple.ttl".to_string()],
tmp_file.reopen().expect("error opening tmp file")
))
.is_ok()
);
let source_reader = BufReader::new(tmp_file.reopen().expect("error opening tmp file"));
let quads = RdfParser::from_format(NTriples)
.for_reader(source_reader)
.collect::<Result<Vec<_>, _>>();
assert!(quads.is_ok());
assert_eq!(quads.unwrap().len(), 9)
}
}