|
18 | 18 | import os |
19 | 19 |
|
20 | 20 | os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key" |
21 | | -epsilla_api_key = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key") |
22 | | -project_id = os.getenv("EPSILLA_PROJECT_ID", "Your-Project-ID") |
23 | | -db_id = os.getenv("EPSILLA_DB_ID", "Your-DB-ID") |
24 | | -db_sharding_id = os.getenv("EPSILLA_DB_SHARDING_ID", 0) |
25 | 21 |
|
| 22 | +EPSILLA_PROJECT_ID = os.getenv("EPSILLA_PROJECT_ID", "Your-Epsilla-Project-ID") |
| 23 | +EPSILLA_API_KEY = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key") |
| 24 | +EPSILLA_DB_ID = os.getenv("EPSILLA_DB_ID", "Your-Epsilla-DB-ID") |
| 25 | +EPSILLA_DB_SHARDING_ID = os.getenv("EPSILLA_DB_SHARDING_ID", 0) |
| 26 | + |
| 27 | +TABLE_NAME = os.getenv("TABLE_NAME", "MyTable") |
| 28 | + |
| 29 | +db_name = f"db_{EPSILLA_DB_ID.replace('-', '_')}" |
| 30 | +db_path = f"/data/{EPSILLA_PROJECT_ID}/{db_name}/s{EPSILLA_DB_SHARDING_ID}" |
| 31 | + |
| 32 | + |
| 33 | +from langchain.text_splitter import CharacterTextSplitter |
26 | 34 |
|
27 | 35 | # Step3. Load the documents |
28 | 36 | from langchain_community.document_loaders import WebBaseLoader |
29 | | -from langchain.text_splitter import CharacterTextSplitter |
30 | 37 | from langchain_openai import OpenAIEmbeddings |
31 | 38 |
|
32 | | -loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt") |
| 39 | +loader = WebBaseLoader( |
| 40 | + "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt" |
| 41 | +) |
33 | 42 | documents = loader.load() |
34 | | -documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents) |
| 43 | +documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents( |
| 44 | + documents |
| 45 | +) |
35 | 46 | embeddings = OpenAIEmbeddings() |
36 | 47 |
|
37 | 48 |
|
38 | 49 | # Step4. Load the vector store |
39 | 50 | from langchain_community.vectorstores import Epsilla |
40 | 51 | from pyepsilla import cloud |
41 | 52 |
|
42 | | -db_name = f"db_{db_id.replace('-', '_')}" |
43 | | -db_path = f"/data/{project_id}/{db_name}/s{db_sharding_id}" |
44 | | -table_name = "MyCollection" |
45 | | - |
46 | 53 | # Step4.1 Connect to Epsilla Cloud |
47 | 54 | cloud_client = cloud.Client( |
48 | | - project_id=project_id, |
49 | | - api_key=epsilla_api_key, |
| 55 | + project_id=EPSILLA_PROJECT_ID, |
| 56 | + api_key=EPSILLA_API_KEY, |
50 | 57 | ) |
51 | 58 |
|
52 | 59 | # Step4.2 Connect to Vectordb |
53 | | -db_client = cloud_client.vectordb(db_id) |
| 60 | +db_client = cloud_client.vectordb(EPSILLA_DB_ID) |
54 | 61 |
|
55 | 62 | vector_store = Epsilla.from_documents( |
56 | 63 | documents, |
57 | 64 | embeddings, |
58 | 65 | db_client, |
59 | 66 | db_path=db_path, |
60 | 67 | db_name=db_name, |
61 | | - collection_name=table_name, |
| 68 | + collection_name=TABLE_NAME, |
62 | 69 | ) |
63 | 70 |
|
64 | 71 | # query = "What did the president say about Ketanji Brown Jackson" |
65 | 72 | # docs = vector_store.similarity_search(query) |
66 | 73 | # print(docs[0].page_content) |
67 | 74 |
|
68 | 75 |
|
69 | | - |
70 | 76 | # Step5. Create the QA for Retrieval |
71 | 77 | from langchain.chains import RetrievalQA |
72 | 78 | from langchain_openai import OpenAI |
|
0 commit comments