Skip to content

Commit 17416bf

Browse files
authored
Cleanup example metadata parsing utilities(#20251) (#20252)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes ##20251. ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent 6b6f64d commit 17416bf

6 files changed

Lines changed: 56 additions & 36 deletions

File tree

datafusion-examples/src/utils/csv_to_parquet.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
use std::path::{Path, PathBuf};
1919

2020
use datafusion::dataframe::DataFrameWriteOptions;
21-
use datafusion::error::Result;
21+
use datafusion::error::{DataFusionError, Result};
2222
use datafusion::prelude::{CsvReadOptions, SessionContext};
23-
use datafusion_common::DataFusionError;
2423
use tempfile::TempDir;
2524
use tokio::fs::create_dir_all;
2625

datafusion-examples/src/utils/datasets/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
use std::path::PathBuf;
1919

2020
use arrow_schema::SchemaRef;
21-
use datafusion::error::Result;
22-
use datafusion_common::DataFusionError;
21+
use datafusion::error::{DataFusionError, Result};
2322

2423
pub mod cars;
2524
pub mod regex;
@@ -50,10 +49,11 @@ impl ExampleDataset {
5049
}
5150

5251
pub fn path_str(&self) -> Result<String> {
53-
self.path().to_str().map(String::from).ok_or_else(|| {
52+
let path = self.path();
53+
path.to_str().map(String::from).ok_or_else(|| {
5454
DataFusionError::Execution(format!(
5555
"CSV directory path is not valid UTF-8: {}",
56-
self.path().display()
56+
path.display()
5757
))
5858
})
5959
}

datafusion-examples/src/utils/example_metadata/discover.rs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020
//! An example group is defined as a directory containing a `main.rs` file
2121
//! under the examples root. This module is intentionally filesystem-focused
2222
//! and does not perform any parsing or rendering.
23+
//! Discovery fails if no valid example groups are found.
2324
2425
use std::fs;
2526
use std::path::{Path, PathBuf};
2627

28+
use datafusion::common::exec_err;
2729
use datafusion::error::Result;
2830

2931
/// Discovers all example group directories under the given root.
@@ -35,10 +37,15 @@ pub fn discover_example_groups(root: &Path) -> Result<Vec<PathBuf>> {
3537
let entry = entry?;
3638
let path = entry.path();
3739

38-
if path.is_dir() && path.join("main.rs").exists() {
40+
if path.is_dir() && path.join("main.rs").is_file() {
3941
groups.push(path);
4042
}
4143
}
44+
45+
if groups.is_empty() {
46+
return exec_err!("No example groups found under: {}", root.display());
47+
}
48+
4249
groups.sort();
4350
Ok(groups)
4451
}
@@ -47,6 +54,8 @@ pub fn discover_example_groups(root: &Path) -> Result<Vec<PathBuf>> {
4754
mod tests {
4855
use super::*;
4956

57+
use crate::utils::example_metadata::test_utils::assert_exec_err_contains;
58+
5059
use std::fs::{self, File};
5160

5261
use tempfile::TempDir;
@@ -66,10 +75,29 @@ mod tests {
6675
fs::create_dir(&group2)?;
6776

6877
let groups = discover_example_groups(root)?;
69-
7078
assert_eq!(groups.len(), 1);
7179
assert_eq!(groups[0], group1);
80+
Ok(())
81+
}
82+
83+
#[test]
84+
fn discover_example_groups_errors_if_main_rs_is_a_directory() -> Result<()> {
85+
let tmp = TempDir::new()?;
86+
let root = tmp.path();
87+
let group = root.join("group");
88+
fs::create_dir(&group)?;
89+
fs::create_dir(group.join("main.rs"))?;
90+
91+
let err = discover_example_groups(root).unwrap_err();
92+
assert_exec_err_contains(err, "No example groups found");
93+
Ok(())
94+
}
7295

96+
#[test]
97+
fn discover_example_groups_errors_if_none_found() -> Result<()> {
98+
let tmp = TempDir::new()?;
99+
let err = discover_example_groups(tmp.path()).unwrap_err();
100+
assert_exec_err_contains(err, "No example groups found");
73101
Ok(())
74102
}
75103
}

datafusion-examples/src/utils/example_metadata/model.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,16 @@ use std::path::Path;
2525

2626
use datafusion::error::{DataFusionError, Result};
2727

28-
use crate::utils::example_metadata::{parse_main_rs_docs, render::ABBREVIATIONS};
28+
use crate::utils::example_metadata::parse_main_rs_docs;
29+
30+
/// Well-known abbreviations used to preserve correct capitalization
31+
/// when generating human-readable documentation titles.
32+
const ABBREVIATIONS: &[(&str, &str)] = &[
33+
("dataframe", "DataFrame"),
34+
("io", "IO"),
35+
("sql", "SQL"),
36+
("udf", "UDF"),
37+
];
2938

3039
/// A group of related examples (e.g. `builtin_functions`, `udf`).
3140
///

datafusion-examples/src/utils/example_metadata/parser.rs

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,16 @@
2121
//! and their associated metadata (file name and description), enforcing
2222
//! a strict ordering and structure to avoid ambiguous documentation.
2323
24-
use std::path::Path;
25-
use std::{collections::HashSet, fs};
24+
use std::{collections::HashSet, fs, path::Path};
2625

27-
use datafusion_common::{DataFusionError, Result};
26+
use datafusion::common::exec_err;
27+
use datafusion::error::Result;
2828
use nom::{
29-
IResult, Parser,
29+
Err, IResult, Parser,
3030
bytes::complete::{tag, take_until, take_while},
3131
character::complete::multispace0,
3232
combinator::all_consuming,
33+
error::{Error, ErrorKind},
3334
sequence::{delimited, preceded},
3435
};
3536

@@ -77,19 +78,13 @@ fn parse_metadata_line(input: &str) -> IResult<&str, (&str, &str)> {
7778
let content = payload
7879
.strip_prefix("(")
7980
.and_then(|s| s.strip_suffix(")"))
80-
.ok_or_else(|| {
81-
nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag))
82-
})?;
81+
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?;
8382

8483
let (file, desc) = content
8584
.strip_prefix("file:")
86-
.ok_or_else(|| {
87-
nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag))
88-
})?
85+
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?
8986
.split_once(", desc:")
90-
.ok_or_else(|| {
91-
nom::Err::Error(nom::error::Error::new(payload, nom::error::ErrorKind::Tag))
92-
})?;
87+
.ok_or_else(|| Err::Error(Error::new(payload, ErrorKind::Tag)))?;
9388

9489
Ok((rest, (file.trim(), desc.trim())))
9590
}
@@ -119,18 +114,16 @@ pub fn parse_main_rs_docs(path: &Path) -> Result<Vec<ExampleEntry>> {
119114
let subcommand = match state {
120115
ParserState::SeenSubcommand(s) => s,
121116
ParserState::Idle => {
122-
return Err(DataFusionError::Execution(format!(
117+
return exec_err!(
123118
"Metadata without preceding subcommand at {}:{}",
124119
path.display(),
125120
line_no + 1
126-
)));
121+
);
127122
}
128123
};
129124

130125
if !seen_subcommands.insert(subcommand) {
131-
return Err(DataFusionError::Execution(format!(
132-
"Duplicate metadata for subcommand `{subcommand}`"
133-
)));
126+
return exec_err!("Duplicate metadata for subcommand `{subcommand}`");
134127
}
135128

136129
entries.push(ExampleEntry {

datafusion-examples/src/utils/example_metadata/render.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,6 @@ cargo run --example dataframe -- dataframe
8585
```
8686
"#;
8787

88-
/// Well-known abbreviations used to preserve correct capitalization
89-
/// when generating human-readable documentation titles.
90-
pub const ABBREVIATIONS: &[(&str, &str)] = &[
91-
("dataframe", "DataFrame"),
92-
("io", "IO"),
93-
("sql", "SQL"),
94-
("udf", "UDF"),
95-
];
96-
9788
/// Generates Markdown documentation for DataFusion examples.
9889
///
9990
/// If `group` is `None`, documentation is generated for all example groups.

0 commit comments

Comments
 (0)