Skip to content

Commit 00ea5a8

Browse files
committed
first pass
1 parent 10514d5 commit 00ea5a8

9 files changed

Lines changed: 803 additions & 1 deletion

File tree

Cargo.lock

Lines changed: 511 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "csv2rdf"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
clap = { version = "4.5.21", features = ["derive","cargo"] }
8+
convert_case = "0.6.0"
9+
csv = "1.3.1"
10+
log = "0.4.22"
11+
oxrdf = "0.2.3"
12+
oxrdfio = "0.1.3"

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
# csv2rdf
2-
Library for converting CSV files into NTriple RDF
2+
Library for converting CSV files into RDF
3+
4+
This Rust-based tool converts CSV data into RDF format, utilizing the `oxrdf` crate for RDF graph handling and `csv` for efficient CSV parsing. Generated triples can either be added to an `oxrdf::Graph` or written directly to file.

src/convert.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use std::collections::HashMap;
2+
3+
use crate::writer::RdfWriter;
4+
use convert_case::{Case, Casing};
5+
use csv::ReaderBuilder;
6+
use log::error;
7+
use oxrdf::{NamedNode, TermRef, TripleRef};
8+
9+
const C2R: &'static str = "https://decisym.ai/csv2rdf/model#";
10+
11+
pub fn parse_csv(
12+
files: Vec<String>,
13+
output: &mut dyn RdfWriter,
14+
namespace: &str,
15+
) -> std::io::Result<()> {
16+
// ensure namespace is ready for appending
17+
let ns = if namespace.ends_with("/") {
18+
namespace
19+
} else {
20+
&([namespace, "/"].join(""))
21+
};
22+
23+
for file in files.into_iter() {
24+
let mut rdr = ReaderBuilder::new()
25+
.has_headers(true)
26+
.from_path(file.to_string())?;
27+
28+
let mut headers: HashMap<i32, String> = HashMap::new();
29+
let mut column_index = 0;
30+
match rdr.headers() {
31+
Ok(h) => {
32+
for val in h.iter() {
33+
headers.insert(column_index, val.to_case(Case::Camel));
34+
column_index += 1;
35+
}
36+
}
37+
Err(e) => {
38+
error!("expected first row of CSV data to contain headers");
39+
return Err(e.into());
40+
}
41+
}
42+
43+
for result in rdr.records() {
44+
let record = result.unwrap();
45+
let mut row_id = "".to_string();
46+
column_index = 0;
47+
for field in record.iter() {
48+
if column_index == 0 {
49+
row_id = field.trim().replace(" ", "");
50+
}
51+
// do not append empty cells
52+
else if field != "" {
53+
let column_id = headers.get(&column_index).unwrap();
54+
let subject = NamedNode::new(format!("{}{}", ns, row_id)).unwrap();
55+
let predicate = NamedNode::new(format!("{}{}", C2R, column_id)).unwrap();
56+
let object = TermRef::Literal(field.into());
57+
let csv_triple = TripleRef::new(subject.as_ref(), predicate.as_ref(), object);
58+
output.add_triple(csv_triple)?
59+
}
60+
column_index += 1;
61+
}
62+
}
63+
}
64+
Ok(())
65+
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pub mod convert;
2+
pub mod writer;

src/main.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
//! # XML2RDF Converter
2+
//!
3+
//! This is a Rust-based tool that converts XML data into RDF format. It uses the `xml-rs` crate
4+
//! for XML parsing and the `oxrdf` crate to construct RDF triples and graphs.
5+
//!
6+
//! ## Features
7+
//! - Parses XML input and converts it to RDF triples
8+
//! - Supports specifying a custom namespace for generated RDF nodes
9+
//! - Outputs RDF data to a specified file, oxrdf::Graph or stdout
10+
//!
11+
//! ## Usage
12+
//! Run the XML2RDF converter from the command line. For detailed usage information, run:
13+
//! ```
14+
//! xml2rdf --help
15+
//! ```
16+
//!
17+
//! ## Example
18+
//! To convert a XML file to RDF format with a specified namespace and output file:
19+
//! ```
20+
//! xml2rdf convert --namespace http://example.com/ns# --xml data.xml --output-file output.nt
21+
//! ```
22+
//! This will take `data.xml`, apply the specified namespace, and save the RDF output in `output.nt`.
23+
24+
use clap::{Parser, Subcommand};
25+
use csv2rdf::*;
26+
27+
/// Command-line interface for XML2RDF Converter
28+
///
29+
/// This struct defines the command-line interface (CLI) for interacting with the XML2RDF converter.
30+
#[derive(Parser)]
31+
#[command(version, about = "Converts XML data into RDF format.")]
32+
struct Cli {
33+
/// CLI command selection
34+
#[command(subcommand)]
35+
command: Option<Commands>,
36+
}
37+
38+
/// Supported Commands
39+
///
40+
/// Contains the available commands for the XML2RDF converter.
41+
#[derive(Subcommand)]
42+
enum Commands {
43+
/// Convert XML to RDF format.
44+
///
45+
/// The `convert` command parses a XML file, converts it to RDF triples using `xml-rs` for parsing
46+
/// and `oxrdf` to construct the graph, and saves the output.
47+
Convert {
48+
/// Namespace for RDF graph generation.
49+
///
50+
/// A custom namespace to prefix RDF resources created from XML keys and values.
51+
#[arg(short, long, default_value = "https://decisym.ai/csv2rdf/data")]
52+
namespace: String,
53+
54+
/// Path to input XML file(s).
55+
///
56+
/// Provide the path to one or more XML files that will be parsed and converted.
57+
#[arg(short, long, num_args = 1..)]
58+
input: Vec<String>,
59+
60+
/// Path to output file.
61+
///
62+
/// Optional: Specify the path to save the generated RDF data. If not provided, data will be written
63+
/// to stdout
64+
#[arg(short, long)]
65+
output: Option<String>,
66+
},
67+
}
68+
69+
fn main() {
70+
let cli = Cli::parse();
71+
72+
match &cli.command {
73+
Some(Commands::Convert {
74+
namespace,
75+
input,
76+
output,
77+
}) => {
78+
let mut w: Box<dyn writer::RdfWriter> = if let Some(file) = output {
79+
match writer::FileWriter::to_file(file.clone()) {
80+
Err(e) => {
81+
eprintln!("Error opening file for writing: {e}");
82+
return;
83+
}
84+
Ok(v) => Box::new(v),
85+
}
86+
} else {
87+
Box::new(writer::FileWriter::to_stdout())
88+
};
89+
90+
match convert::parse_csv(input.clone(), w.as_mut(), namespace) {
91+
Ok(_) => {}
92+
Err(e) => eprintln!("Error writing: {}", e),
93+
}
94+
}
95+
None => {}
96+
}
97+
}

src/writer.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//! # XML2RDF Writer Library
2+
//!
3+
//! This library provides functionality for writing covnerted XML2RDF data.
4+
//! It uses `oxrdf` to build and manage RDF graphs or output the data direct to a file.
5+
//!
6+
//! ## Overview
7+
//! - Adds XML RDF triples to a graph or file.
8+
9+
use oxrdf::{Graph, TripleRef};
10+
use std::fs::File;
11+
use std::fs::OpenOptions;
12+
use std::io::{self, BufWriter, Write};
13+
14+
pub trait RdfWriter {
15+
fn add_triple(&mut self, triple: TripleRef) -> std::io::Result<()>;
16+
}
17+
18+
pub struct FileWriter<W: Write> {
19+
writer: BufWriter<W>,
20+
}
21+
22+
impl FileWriter<io::Stdout> {
23+
pub fn to_stdout() -> Self {
24+
FileWriter {
25+
writer: BufWriter::new(io::stdout()),
26+
}
27+
}
28+
}
29+
30+
impl FileWriter<File> {
31+
pub fn to_file(output_file: String) -> io::Result<Self> {
32+
let file = OpenOptions::new()
33+
.create(true)
34+
.append(true)
35+
.open(output_file)?;
36+
Ok(FileWriter {
37+
writer: BufWriter::new(file),
38+
})
39+
}
40+
}
41+
42+
impl<W: Write> RdfWriter for FileWriter<W> {
43+
fn add_triple(&mut self, triple: TripleRef) -> std::io::Result<()> {
44+
self.writer.write_all(triple.to_string().as_bytes())?;
45+
self.writer.write_all(b" .\n")?;
46+
let _ = self.writer.flush();
47+
Ok(())
48+
}
49+
}
50+
51+
pub struct GraphWriter<'a> {
52+
graph: &'a mut Graph,
53+
}
54+
55+
impl<'a> GraphWriter<'a> {
56+
pub fn new(graph: &'a mut Graph) -> Self {
57+
Self { graph }
58+
}
59+
}
60+
61+
impl<'a> RdfWriter for GraphWriter<'a> {
62+
fn add_triple(&mut self, triple: TripleRef) -> std::io::Result<()> {
63+
self.graph.insert(triple.clone());
64+
Ok(())
65+
}
66+
}

tests/integration.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use csv2rdf::*;
2+
use oxrdf::Graph;
3+
use oxrdfio::{RdfFormat, RdfParser};
4+
use std::fs;
5+
use std::fs::File;
6+
7+
#[test]
8+
fn test_graph_writer() {
9+
let mut g = Graph::new();
10+
let mut w = writer::GraphWriter::new(&mut g);
11+
12+
let res = convert::parse_csv(
13+
vec!["tests/resources/people.csv".to_string()],
14+
&mut w,
15+
"https://decisym.ai/xml2rdf/data",
16+
);
17+
assert!(res.is_ok());
18+
19+
assert_eq!(g.len(), 23)
20+
}
21+
22+
#[test]
23+
fn test_file_writer() {
24+
let output = "out.nt".to_string();
25+
let _ = fs::remove_file(output.clone());
26+
27+
let mut w = writer::FileWriter::to_file(output.clone()).expect("Failed to open output file");
28+
29+
let res = convert::parse_csv(
30+
vec!["tests/resources/people.csv".to_string()],
31+
&mut w,
32+
"https://decisym.ai/csv2rdf/data",
33+
);
34+
assert!(res.is_ok());
35+
let f = File::open(output).expect("unable to open output file for result verification");
36+
let quads = RdfParser::from_format(RdfFormat::NTriples)
37+
.for_reader(f)
38+
.collect::<Result<Vec<_>, _>>()
39+
.expect("failed to parse generated output file");
40+
41+
assert_eq!(quads.len(), 23)
42+
}

tests/resources/people.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Name,Address,Place,Country,Age,Hobby,Favourite Colour
2+
John,Dam 52,Amsterdam,The Netherlands,32,Fishing,Blue
3+
Jenny,Leidseplein 2,Amsterdam,The Netherlands,12,,Mauve
4+
Jill,52W Street 5,Amsterdam,United States of America,28,Carpentry,Cyan
5+
Jake,12E Street 98,Amsterdam,United States of America,42,Ballet,Purple

0 commit comments

Comments
 (0)