Skip to content

Commit 1fe0efd

Browse files
committed
add Question_Answering_Pipeline_with_LangChain_and_EpsillaCloud
Signed-off-by: eric-epsilla <eric@epsilla.com>
1 parent c91be42 commit 1fe0efd

1 file changed

Lines changed: 81 additions & 0 deletions

File tree

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
5+
# Question Answering Pipeline with LangChain and Epsilla
6+
# Step1. Install the required packages
7+
"""
8+
pip install langchain
9+
pip install openai
10+
pip install tiktoken
11+
pip install pyepsilla
12+
pip install -U langchain-openai
13+
pip uninstall -y langchain-community
14+
git clone https://github.com/epsilla-cloud/langchain.git
15+
cd ./langchain/libs/community
16+
pip install .
17+
"""
18+
19+
20+
21+
# Step2. Configure the OpenAI API Key
22+
import os
23+
24+
os.environ["OPENAI_API_KEY"] = ""
25+
epsilla_api_key = os.getenv("EPSILLA_API_KEY", "")
26+
project_id = os.getenv("EPSILLA_PROJECT_ID", "")
27+
db_id = os.getenv("EPSILLA_DB_ID", "")
28+
db_sharding_id = os.getenv("EPSILLA_DB_SHARDING_ID", 0)
29+
30+
# Step3. Load the documents
31+
from langchain.document_loaders import WebBaseLoader
32+
from langchain.text_splitter import CharacterTextSplitter
33+
from langchain_openai import OpenAIEmbeddings
34+
35+
loader = WebBaseLoader(
36+
"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt"
37+
)
38+
documents = loader.load()
39+
documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(
40+
documents
41+
)
42+
embeddings = OpenAIEmbeddings()
43+
44+
45+
# Step4. Load the vector store
46+
from langchain.vectorstores import Epsilla
47+
from pyepsilla import cloud, vectordb
48+
49+
50+
db_name = f"db_{db_id.replace('-', '_')}"
51+
db_path = f"/data/{project_id}/{db_name}/s{db_sharding_id}"
52+
table_name = "MyCollection"
53+
54+
# Connect to Epsilla Cloud
55+
cloud_client = cloud.Client(
56+
project_id=project_id,
57+
api_key=epsilla_api_key,
58+
)
59+
60+
# Connect to Vectordb
61+
db_client = cloud_client.vectordb(db_id)
62+
63+
vector_store = Epsilla.from_documents(
64+
documents,
65+
embeddings,
66+
db_client,
67+
db_path=db_path,
68+
db_name=db_name,
69+
collection_name=table_name,
70+
)
71+
72+
# Step4. Create the QA for Retrieval
73+
from langchain.chains import RetrievalQA
74+
from langchain_openai import OpenAI
75+
76+
qa = RetrievalQA.from_chain_type(
77+
llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever()
78+
)
79+
query = "What did the president say about Ketanji Brown Jackson"
80+
resp = qa.invoke(query)
81+
print("resp:", resp)

0 commit comments

Comments
 (0)