-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathflow.py
More file actions
30 lines (26 loc) · 1.05 KB
/
flow.py
File metadata and controls
30 lines (26 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from pocketflow import Flow
from nodes import (
DuplicateDetectionNode,
TableSummaryNode,
ColumnDescriptionNode,
DataTypeAnalysisNode,
MissingValuesAnalysisNode,
UniquenessAnalysisNode,
UnusualValuesDetectionNode,
GenerateReportNode
)
def create_data_profiling_flow():
"""Create and return a data profiling flow."""
# Create all nodes
duplicate_node = DuplicateDetectionNode()
summary_node = TableSummaryNode()
column_desc_node = ColumnDescriptionNode()
data_type_node = DataTypeAnalysisNode()
missing_values_node = MissingValuesAnalysisNode()
uniqueness_node = UniquenessAnalysisNode()
unusual_values_node = UnusualValuesDetectionNode()
report_node = GenerateReportNode()
# Connect nodes in sequence (following the workflow design)
duplicate_node >> summary_node >> column_desc_node >> data_type_node >> missing_values_node >> uniqueness_node >> unusual_values_node >> report_node
# Create flow starting with duplicate detection
return Flow(start=duplicate_node)