Skip to content

Commit 55fb6ab

Browse files
authored
add file I/O tools (#35)
Signed-off-by: SamYuan1990 <yy19902439@126.com>
1 parent 53226c0 commit 55fb6ab

4 files changed

Lines changed: 1138 additions & 0 deletions

File tree

scl/capabilities/fileread.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
"""
2+
File Read Function Call Module
3+
4+
Represents a file reading capability, inheriting from Capability.
5+
Implements the abstract execute method for reading file content with safety checks.
6+
7+
Features and design goals
8+
--------------------------
9+
- Read specific file according to path.
10+
- Path Targeting (defaults to CWD, supports multiple allowed directories)
11+
- Avoid reading binary files (pictures, video, pdf, executables, etc.)
12+
- Returns file content or raises an error describing why the file cannot be read.
13+
- Full OpenTelemetry instrumentation: tracing, metrics, structured logging.
14+
15+
Project Constraints Applied:
16+
----------------------------
17+
- OpenTelemetry integrated for tracing, metrics, and structured logging.
18+
- Logger provides info and debug levels.
19+
"""
20+
21+
import logging
22+
import os
23+
from typing import Optional, Dict, Any, List
24+
25+
from opentelemetry import trace
26+
from scl.otel.otel import tracer, meter
27+
from scl.meta.capability import Capability
28+
29+
logger = logging.getLogger(__name__)
30+
31+
# Metric counting file read executions
32+
file_read_counter = meter.create_counter(
33+
"file_read.executed",
34+
description="Number of times a file was read (successful or attempted)"
35+
)
36+
37+
# Known binary file extensions – these will be refused
38+
_BINARY_EXTENSIONS = {
39+
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp",
40+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
41+
".exe", ".dll", ".so", ".dylib", ".bin", ".zip", ".gz",
42+
".tar", ".7z", ".rar", ".mp3", ".mp4", ".avi", ".mov", ".mkv",
43+
".iso", ".psd", ".ai", ".sketch", ".vsd", ".odt", ".ods", ".odp",
44+
".ttf", ".otf", ".woff", ".woff2", ".class", ".pyc", ".pyo"
45+
}
46+
47+
48+
class FileRead(Capability):
49+
"""
50+
Concrete implementation of Capability for reading text files.
51+
52+
Parameters
53+
----------
54+
name, description, original_body, llm_description : see Capability
55+
allowed_directories : Optional[List[str]]
56+
Directories from which files may be read. Relative paths are
57+
resolved against these directories. Defaults to [current working directory].
58+
"""
59+
60+
@tracer.start_as_current_span("FileRead.__init__")
61+
def __init__(self,
62+
name: str,
63+
description: str,
64+
original_body: str,
65+
llm_description: Optional[str] = None,
66+
allowed_directories: Optional[List[str]] = None):
67+
current_span = trace.get_current_span()
68+
current_span.set_attribute("file_read.name", name)
69+
70+
if allowed_directories is None:
71+
allowed_directories = [os.getcwd()]
72+
# Ensure all directories are absolute for safe path comparison
73+
self._allowed_dirs = [os.path.abspath(d) for d in allowed_directories]
74+
75+
current_span.set_attribute("file_read.allowed_directories", str(self._allowed_dirs))
76+
logger.debug(f"FileRead '{name}' allowed directories: {self._allowed_dirs}")
77+
78+
super().__init__(
79+
name=name,
80+
type="file_read",
81+
description=description,
82+
original_body=original_body,
83+
llm_description=llm_description,
84+
function_impl=None # Concrete tool – no dynamic code
85+
)
86+
87+
logger.info(f"FileRead capability '{name}' created")
88+
89+
@tracer.start_as_current_span("FileRead.execute")
90+
def execute(self, args_dict: Dict[str, Any]) -> str:
91+
"""
92+
Execute the file read operation.
93+
94+
Args:
95+
args_dict: Must contain a "path" key with the file path to read.
96+
The path may be absolute or relative to any allowed directory.
97+
98+
Returns:
99+
File content as a string.
100+
101+
Raises:
102+
ValueError: If the path is missing, points outside allowed directories,
103+
does not exist, or points to a binary file.
104+
OSError: On actual file reading errors.
105+
"""
106+
current_span = trace.get_current_span()
107+
current_span.set_attribute("file_read.name", self.name)
108+
109+
# Validate input
110+
if "path" not in args_dict:
111+
error_msg = "Missing required argument 'path'"
112+
logger.error(error_msg)
113+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
114+
raise ValueError(error_msg)
115+
116+
raw_path = args_dict["path"]
117+
current_span.set_attribute("file_read.raw_path", raw_path)
118+
logger.debug(f"FileRead '{self.name}' requested path: {raw_path}")
119+
120+
# Resolve absolute path: if relative, try to find it inside an allowed directory
121+
if os.path.isabs(raw_path):
122+
candidate = os.path.abspath(raw_path)
123+
# Check if it resides under any allowed directory
124+
if not any(candidate.startswith(d) for d in self._allowed_dirs):
125+
error_msg = f"Absolute path '{raw_path}' is not within allowed directories: {self._allowed_dirs}"
126+
logger.error(error_msg)
127+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
128+
raise ValueError(error_msg)
129+
target_path = candidate
130+
else:
131+
# Search through allowed directories for the first match
132+
found = None
133+
for base_dir in self._allowed_dirs:
134+
candidate = os.path.join(base_dir, raw_path)
135+
if os.path.exists(candidate):
136+
found = candidate
137+
break
138+
if found is None:
139+
error_msg = f"Relative path '{raw_path}' does not exist in any allowed directory: {self._allowed_dirs}"
140+
logger.error(error_msg)
141+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
142+
raise ValueError(error_msg)
143+
target_path = os.path.abspath(found)
144+
145+
current_span.set_attribute("file_read.resolved_path", target_path)
146+
logger.info(f"FileRead resolved path: {target_path}")
147+
148+
# Check existence and type
149+
if not os.path.exists(target_path):
150+
error_msg = f"File not found: {target_path}"
151+
logger.error(error_msg)
152+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
153+
raise ValueError(error_msg)
154+
155+
if not os.path.isfile(target_path):
156+
error_msg = f"Path is not a regular file: {target_path}"
157+
logger.error(error_msg)
158+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
159+
raise ValueError(error_msg)
160+
161+
# Binary file check using extensions
162+
_, ext = os.path.splitext(target_path)
163+
if ext.lower() in _BINARY_EXTENSIONS:
164+
error_msg = f"Binary file extension '{ext}' is not allowed for reading: {target_path}"
165+
logger.error(error_msg)
166+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
167+
raise ValueError(error_msg)
168+
169+
# Attempt to read the content
170+
try:
171+
with open(target_path, "r", encoding="utf-8") as f:
172+
content = f.read()
173+
except UnicodeDecodeError as ude:
174+
error_msg = f"File could not be decoded as UTF-8: {target_path}"
175+
logger.error(error_msg)
176+
current_span.record_exception(ude)
177+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, error_msg))
178+
raise ValueError(error_msg) from ude
179+
except OSError as ose:
180+
logger.error(f"OS error reading file {target_path}: {ose}", exc_info=True)
181+
current_span.record_exception(ose)
182+
current_span.set_status(trace.Status(trace.StatusCode.ERROR, str(ose)))
183+
raise
184+
185+
# Success – update observability
186+
file_read_counter.add(1, {"file_read.name": self.name, "outcome": "success"})
187+
current_span.set_attribute("file_read.content_length", len(content))
188+
logger.info(f"FileRead '{self.name}' successfully read {len(content)} bytes from {target_path}")
189+
return content
190+
191+
def __repr__(self) -> str:
192+
return (f"FileRead(name='{self.name}', "
193+
f"allowed_dirs={self._allowed_dirs})")
194+
195+
196+
"""
197+
Example usage:
198+
--------------
199+
from scl.capabilities.file_read import FileRead
200+
201+
# Create a file reader limited to the current working directory
202+
reader = FileRead(
203+
name="local_file",
204+
description="Reads text files from the project directory",
205+
original_body="Read local text files"
206+
)
207+
208+
# Read an existing file
209+
content = reader.execute({"path": "README.md"})
210+
print(content)
211+
212+
# Having multiple allowed directories
213+
wide_reader = FileRead(
214+
name="wide_reader",
215+
description="Can read from /data and /tmp",
216+
original_body="Wide file access",
217+
allowed_directories=["/data", "/tmp"]
218+
)
219+
220+
# Reading from an absolute path (must be inside an allowed directory)
221+
report = wide_reader.execute({"path": "/data/reports/summary.txt"})
222+
print(report[:100])
223+
224+
# This will raise a ValueError because the extension is binary:
225+
# reader.execute({"path": "photo.png"})
226+
"""

0 commit comments

Comments
 (0)