|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding:utf-8 -*- |
| 3 | + |
| 4 | + |
| 5 | +# Question Answering Pipeline with LangChain and Epsilla |
| 6 | +# Step1. Install the required packages |
| 7 | +""" |
| 8 | +pip install langchain |
| 9 | +pip install openai |
| 10 | +pip install tiktoken |
| 11 | +pip install pyepsilla |
| 12 | +pip install -U langchain-openai |
| 13 | +pip uninstall -y langchain-community |
| 14 | +git clone https://github.com/epsilla-cloud/langchain.git |
| 15 | +cd ./langchain/libs/community |
| 16 | +pip install . |
| 17 | +""" |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +# Step2. Configure the OpenAI API Key |
| 22 | +import os |
| 23 | + |
| 24 | +os.environ["OPENAI_API_KEY"] = "" |
| 25 | +epsilla_api_key = os.getenv("EPSILLA_API_KEY", "") |
| 26 | +project_id = os.getenv("EPSILLA_PROJECT_ID", "") |
| 27 | +db_id = os.getenv("EPSILLA_DB_ID", "") |
| 28 | +db_sharding_id = os.getenv("EPSILLA_DB_SHARDING_ID", 0) |
| 29 | + |
| 30 | +# Step3. Load the documents |
| 31 | +from langchain.document_loaders import WebBaseLoader |
| 32 | +from langchain.text_splitter import CharacterTextSplitter |
| 33 | +from langchain_openai import OpenAIEmbeddings |
| 34 | + |
| 35 | +loader = WebBaseLoader( |
| 36 | + "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt" |
| 37 | +) |
| 38 | +documents = loader.load() |
| 39 | +documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents( |
| 40 | + documents |
| 41 | +) |
| 42 | +embeddings = OpenAIEmbeddings() |
| 43 | + |
| 44 | + |
| 45 | +# Step4. Load the vector store |
| 46 | +from langchain.vectorstores import Epsilla |
| 47 | +from pyepsilla import cloud, vectordb |
| 48 | + |
| 49 | + |
| 50 | +db_name = f"db_{db_id.replace('-', '_')}" |
| 51 | +db_path = f"/data/{project_id}/{db_name}/s{db_sharding_id}" |
| 52 | +table_name = "MyCollection" |
| 53 | + |
| 54 | +# Connect to Epsilla Cloud |
| 55 | +cloud_client = cloud.Client( |
| 56 | + project_id=project_id, |
| 57 | + api_key=epsilla_api_key, |
| 58 | +) |
| 59 | + |
| 60 | +# Connect to Vectordb |
| 61 | +db_client = cloud_client.vectordb(db_id) |
| 62 | + |
| 63 | +vector_store = Epsilla.from_documents( |
| 64 | + documents, |
| 65 | + embeddings, |
| 66 | + db_client, |
| 67 | + db_path=db_path, |
| 68 | + db_name=db_name, |
| 69 | + collection_name=table_name, |
| 70 | +) |
| 71 | + |
| 72 | +# Step4. Create the QA for Retrieval |
| 73 | +from langchain.chains import RetrievalQA |
| 74 | +from langchain_openai import OpenAI |
| 75 | + |
| 76 | +qa = RetrievalQA.from_chain_type( |
| 77 | + llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever() |
| 78 | +) |
| 79 | +query = "What did the president say about Ketanji Brown Jackson" |
| 80 | +resp = qa.invoke(query) |
| 81 | +print("resp:", resp) |
0 commit comments