-
Notifications
You must be signed in to change notification settings - Fork 34
Expand file tree
/
Copy pathmain.py
More file actions
106 lines (90 loc) · 3.64 KB
/
main.py
File metadata and controls
106 lines (90 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import sys
from urllib.parse import urlparse
from flow import create_support_bot_flow
def get_domain_from_url(url):
"""Extract domain from URL for allowed_domains"""
parsed = urlparse(url)
return parsed.netloc.lower()
def main():
# Get command line arguments
if len(sys.argv) < 3:
print("Usage: python main.py <start_url1> [start_url2] ... '<initial_question>' [instruction]")
print("Example: python main.py https://example.com 'What is your return policy?'")
sys.exit(1)
# Argument parsing logic...
if len(sys.argv) >= 4 and not sys.argv[-1].startswith(('http://', 'https://')):
start_urls = sys.argv[1:-2]
initial_question = sys.argv[-2]
instruction = sys.argv[-1]
else:
start_urls = sys.argv[1:-1]
initial_question = sys.argv[-1]
instruction = "Provide helpful and accurate answers based on the website content."
# Validate URLs
for url in start_urls:
if not url.startswith(('http://', 'https://')):
print(f"Error: '{url}' is not a valid URL. URLs must start with http:// or https://")
sys.exit(1)
domains = [d for d in (get_domain_from_url(url) for url in start_urls) if d]
# Initialize shared store. This state will be preserved across conversations.
shared = {
"conversation_history": [],
"instruction": instruction,
"allowed_domains": list(set(domains)),
"max_iterations": 5,
"max_pages": 100,
"content_max_chars": 100000,
"max_urls_per_iteration": 10,
# URL tracking state
"all_discovered_urls": start_urls.copy(),
"visited_urls": set(),
"url_content": {},
"url_graph": {},
# Per-run state (will be set in the loop)
"user_question": "",
"urls_to_process": [],
"current_iteration": 0,
"final_answer": None
}
support_bot_flow = create_support_bot_flow()
# --- Conversational Loop ---
is_first_run = True
while True:
if is_first_run:
shared["user_question"] = initial_question
# For the first run, process the starting URLs
shared["urls_to_process"] = list(range(len(start_urls)))
is_first_run = False
else:
try:
follow_up_question = input("\nAsk a follow-up question (or press Ctrl+C to exit): ")
if not follow_up_question.strip():
continue
shared["user_question"] = follow_up_question
# For subsequent runs, the agent must decide to explore
shared["urls_to_process"] = []
except (EOFError, KeyboardInterrupt):
print("\n\nExiting.")
break
print(f"\n=== Answering: '{shared['user_question']}' ===")
print("=" * 50)
# Reset per-run state
shared["current_iteration"] = 0
shared["final_answer"] = None
support_bot_flow.run(shared)
print("\n" + "=" * 50)
if shared["final_answer"]:
print("Final Answer:")
print(shared["final_answer"])
# Add to conversation history
shared["conversation_history"].append({
"user": shared["user_question"],
"bot": shared["final_answer"]
})
else:
print("No final answer was generated.")
print(f"\nExploration Summary:")
print(f"- Visited {len(shared['visited_urls'])} pages")
print(f"- Discovered {len(shared['all_discovered_urls'])} total URLs")
if __name__ == "__main__":
main()