1+ """
2+ File Read Function Call Module
3+
4+ Represents a file reading capability, inheriting from Capability.
5+ Implements the abstract execute method for reading file content with safety checks.
6+
7+ Features and design goals
8+ --------------------------
9+ - Read specific file according to path.
10+ - Path Targeting (defaults to CWD, supports multiple allowed directories)
11+ - Avoid reading binary files (pictures, video, pdf, executables, etc.)
12+ - Returns file content or raises an error describing why the file cannot be read.
13+ - Full OpenTelemetry instrumentation: tracing, metrics, structured logging.
14+
15+ Project Constraints Applied:
16+ ----------------------------
17+ - OpenTelemetry integrated for tracing, metrics, and structured logging.
18+ - Logger provides info and debug levels.
19+ """
20+
21+ import logging
22+ import os
23+ from typing import Optional , Dict , Any , List
24+
25+ from opentelemetry import trace
26+ from scl .otel .otel import tracer , meter
27+ from scl .meta .capability import Capability
28+
29+ logger = logging .getLogger (__name__ )
30+
31+ # Metric counting file read executions
32+ file_read_counter = meter .create_counter (
33+ "file_read.executed" ,
34+ description = "Number of times a file was read (successful or attempted)"
35+ )
36+
37+ # Known binary file extensions – these will be refused
38+ _BINARY_EXTENSIONS = {
39+ ".jpg" , ".jpeg" , ".png" , ".gif" , ".bmp" , ".tiff" , ".webp" ,
40+ ".pdf" , ".doc" , ".docx" , ".xls" , ".xlsx" , ".ppt" , ".pptx" ,
41+ ".exe" , ".dll" , ".so" , ".dylib" , ".bin" , ".zip" , ".gz" ,
42+ ".tar" , ".7z" , ".rar" , ".mp3" , ".mp4" , ".avi" , ".mov" , ".mkv" ,
43+ ".iso" , ".psd" , ".ai" , ".sketch" , ".vsd" , ".odt" , ".ods" , ".odp" ,
44+ ".ttf" , ".otf" , ".woff" , ".woff2" , ".class" , ".pyc" , ".pyo"
45+ }
46+
47+
48+ class FileRead (Capability ):
49+ """
50+ Concrete implementation of Capability for reading text files.
51+
52+ Parameters
53+ ----------
54+ name, description, original_body, llm_description : see Capability
55+ allowed_directories : Optional[List[str]]
56+ Directories from which files may be read. Relative paths are
57+ resolved against these directories. Defaults to [current working directory].
58+ """
59+
60+ @tracer .start_as_current_span ("FileRead.__init__" )
61+ def __init__ (self ,
62+ name : str ,
63+ description : str ,
64+ original_body : str ,
65+ llm_description : Optional [str ] = None ,
66+ allowed_directories : Optional [List [str ]] = None ):
67+ current_span = trace .get_current_span ()
68+ current_span .set_attribute ("file_read.name" , name )
69+
70+ if allowed_directories is None :
71+ allowed_directories = [os .getcwd ()]
72+ # Ensure all directories are absolute for safe path comparison
73+ self ._allowed_dirs = [os .path .abspath (d ) for d in allowed_directories ]
74+
75+ current_span .set_attribute ("file_read.allowed_directories" , str (self ._allowed_dirs ))
76+ logger .debug (f"FileRead '{ name } ' allowed directories: { self ._allowed_dirs } " )
77+
78+ super ().__init__ (
79+ name = name ,
80+ type = "file_read" ,
81+ description = description ,
82+ original_body = original_body ,
83+ llm_description = llm_description ,
84+ function_impl = None # Concrete tool – no dynamic code
85+ )
86+
87+ logger .info (f"FileRead capability '{ name } ' created" )
88+
89+ @tracer .start_as_current_span ("FileRead.execute" )
90+ def execute (self , args_dict : Dict [str , Any ]) -> str :
91+ """
92+ Execute the file read operation.
93+
94+ Args:
95+ args_dict: Must contain a "path" key with the file path to read.
96+ The path may be absolute or relative to any allowed directory.
97+
98+ Returns:
99+ File content as a string.
100+
101+ Raises:
102+ ValueError: If the path is missing, points outside allowed directories,
103+ does not exist, or points to a binary file.
104+ OSError: On actual file reading errors.
105+ """
106+ current_span = trace .get_current_span ()
107+ current_span .set_attribute ("file_read.name" , self .name )
108+
109+ # Validate input
110+ if "path" not in args_dict :
111+ error_msg = "Missing required argument 'path'"
112+ logger .error (error_msg )
113+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
114+ raise ValueError (error_msg )
115+
116+ raw_path = args_dict ["path" ]
117+ current_span .set_attribute ("file_read.raw_path" , raw_path )
118+ logger .debug (f"FileRead '{ self .name } ' requested path: { raw_path } " )
119+
120+ # Resolve absolute path: if relative, try to find it inside an allowed directory
121+ if os .path .isabs (raw_path ):
122+ candidate = os .path .abspath (raw_path )
123+ # Check if it resides under any allowed directory
124+ if not any (candidate .startswith (d ) for d in self ._allowed_dirs ):
125+ error_msg = f"Absolute path '{ raw_path } ' is not within allowed directories: { self ._allowed_dirs } "
126+ logger .error (error_msg )
127+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
128+ raise ValueError (error_msg )
129+ target_path = candidate
130+ else :
131+ # Search through allowed directories for the first match
132+ found = None
133+ for base_dir in self ._allowed_dirs :
134+ candidate = os .path .join (base_dir , raw_path )
135+ if os .path .exists (candidate ):
136+ found = candidate
137+ break
138+ if found is None :
139+ error_msg = f"Relative path '{ raw_path } ' does not exist in any allowed directory: { self ._allowed_dirs } "
140+ logger .error (error_msg )
141+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
142+ raise ValueError (error_msg )
143+ target_path = os .path .abspath (found )
144+
145+ current_span .set_attribute ("file_read.resolved_path" , target_path )
146+ logger .info (f"FileRead resolved path: { target_path } " )
147+
148+ # Check existence and type
149+ if not os .path .exists (target_path ):
150+ error_msg = f"File not found: { target_path } "
151+ logger .error (error_msg )
152+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
153+ raise ValueError (error_msg )
154+
155+ if not os .path .isfile (target_path ):
156+ error_msg = f"Path is not a regular file: { target_path } "
157+ logger .error (error_msg )
158+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
159+ raise ValueError (error_msg )
160+
161+ # Binary file check using extensions
162+ _ , ext = os .path .splitext (target_path )
163+ if ext .lower () in _BINARY_EXTENSIONS :
164+ error_msg = f"Binary file extension '{ ext } ' is not allowed for reading: { target_path } "
165+ logger .error (error_msg )
166+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
167+ raise ValueError (error_msg )
168+
169+ # Attempt to read the content
170+ try :
171+ with open (target_path , "r" , encoding = "utf-8" ) as f :
172+ content = f .read ()
173+ except UnicodeDecodeError as ude :
174+ error_msg = f"File could not be decoded as UTF-8: { target_path } "
175+ logger .error (error_msg )
176+ current_span .record_exception (ude )
177+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , error_msg ))
178+ raise ValueError (error_msg ) from ude
179+ except OSError as ose :
180+ logger .error (f"OS error reading file { target_path } : { ose } " , exc_info = True )
181+ current_span .record_exception (ose )
182+ current_span .set_status (trace .Status (trace .StatusCode .ERROR , str (ose )))
183+ raise
184+
185+ # Success – update observability
186+ file_read_counter .add (1 , {"file_read.name" : self .name , "outcome" : "success" })
187+ current_span .set_attribute ("file_read.content_length" , len (content ))
188+ logger .info (f"FileRead '{ self .name } ' successfully read { len (content )} bytes from { target_path } " )
189+ return content
190+
191+ def __repr__ (self ) -> str :
192+ return (f"FileRead(name='{ self .name } ', "
193+ f"allowed_dirs={ self ._allowed_dirs } )" )
194+
195+
196+ """
197+ Example usage:
198+ --------------
199+ from scl.capabilities.file_read import FileRead
200+
201+ # Create a file reader limited to the current working directory
202+ reader = FileRead(
203+ name="local_file",
204+ description="Reads text files from the project directory",
205+ original_body="Read local text files"
206+ )
207+
208+ # Read an existing file
209+ content = reader.execute({"path": "README.md"})
210+ print(content)
211+
212+ # Having multiple allowed directories
213+ wide_reader = FileRead(
214+ name="wide_reader",
215+ description="Can read from /data and /tmp",
216+ original_body="Wide file access",
217+ allowed_directories=["/data", "/tmp"]
218+ )
219+
220+ # Reading from an absolute path (must be inside an allowed directory)
221+ report = wide_reader.execute({"path": "/data/reports/summary.txt"})
222+ print(report[:100])
223+
224+ # This will raise a ValueError because the extension is binary:
225+ # reader.execute({"path": "photo.png"})
226+ """
0 commit comments