Skip to content

Commit 39ca2c2

Browse files
authored
Merge branch 'main' into my-feature-branch
2 parents 1b2de64 + a44f907 commit 39ca2c2

21 files changed

Lines changed: 1174 additions & 124 deletions

.github/workflows/pypi-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ name: Upload Python Package
1111
on:
1212
push:
1313
branches: [ "main" ]
14-
paths: ['pyepsilla/vectordb/version.py']
14+
paths: ['pyepsilla/vectordb/version.py', 'pyproject.toml']
1515

1616
jobs:
1717
deploy:

README.md

Lines changed: 120 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,54 @@
1-
# Epsilla Python SDK
1+
<p align="center">
2+
<img width="275" alt="Epsilla Logo" src="https://epsilla-misc.s3.amazonaws.com/epsilla-horizontal.png">
3+
</p>
24

3-
Welcome to Python SDK for Epsilla Vector Database!
4-
https://pypi.org/project/pyepsilla/#history
5+
<p align="center">
6+
<b>Python Client for <a href="https://github.com/epsilla-cloud/vectordb">Epsilla</a> Vector Database</b>
7+
</p>
58

6-
## Installation
7-
```shell
8-
pip3 install pyepsilla
9-
```
10-
or
9+
<hr />
10+
11+
Welcome to Python SDK for Epsilla Vector Database!
12+
- <a href="https://epsilla-inc.gitbook.io/epsilladb/vector-database/connect-to-a-database">QuickStart</a>
13+
- <a href="https://pypi.org/project/pyepsilla/#history">Release History</a>
14+
15+
## Install pyepsilla
1116
```shell
1217
pip3 install --upgrade pyepsilla
1318
```
1419

15-
## Documentation
20+
## Connect to Epsilla Vector Database
1621

17-
### 1.1 Run epsilla vectordb on localhost
22+
#### Run epsilla vectordb on localhost
1823
```shell
1924
docker pull epsilla/vectordb
2025
docker run -d -p 8888:8888 epsilla/vectordb
2126
```
2227

23-
### 1.2 Use pyepsilla to connect to and interact with local vector database
28+
#### When Port 8888 conflicted with Jupyter Notebook
29+
If you are using Jupyter Notebook on localhost, the port 8888 maybe conflict!
30+
31+
So you can change the vectordb port to another number, such as 18888
32+
```
33+
docker run -d -p 18888:8888 epsilla/vectordb
34+
```
35+
36+
#### Use pyepsilla to connect to and interact with local vector database
2437

2538
```python
2639
from pyepsilla import vectordb
2740

28-
## connect to vectordb
41+
## 1.Connect to vectordb
2942
client = vectordb.Client(
3043
host='localhost',
3144
port='8888'
3245
)
3346

34-
## load and use a database
47+
## 2.Load and use a database
3548
client.load_db(db_name="MyDB", db_path="/tmp/epsilla")
3649
client.use_db(db_name="MyDB")
3750

38-
## create a table in the current database
51+
## 3.Create a table in the current database
3952
client.create_table(
4053
table_name="MyTable",
4154
table_fields=[
@@ -45,7 +58,7 @@ client.create_table(
4558
]
4659
)
4760

48-
## insert records
61+
## 4.Insert records
4962
client.insert(
5063
table_name="MyTable",
5164
records=[
@@ -57,7 +70,7 @@ client.insert(
5770
]
5871
)
5972

60-
## search with specific response field
73+
## 5.Search with specific response field
6174
status_code, response = client.query(
6275
table_name="MyTable",
6376
query_field="Embedding",
@@ -67,7 +80,7 @@ status_code, response = client.query(
6780
)
6881
print(response)
6982

70-
## search without specific response field, then it will return all fields
83+
## 6.Search without specific response field, then it will return all fields
7184
status_code, response = client.query(
7285
table_name="MyTable",
7386
query_field="Embedding",
@@ -76,46 +89,117 @@ status_code, response = client.query(
7689
)
7790
print(response)
7891

79-
80-
81-
## delete records by primary_keys (and filter)
82-
# status_code, response = client.delete(table_name="MyTable", ids=[3])
92+
## 7.Delete records by primary_keys (and filter)
8393
status_code, response = client.delete(table_name="MyTable", primary_keys=[3, 4])
84-
# status_code, response = client.delete(table_name="MyTable", filter="Doc <> 'San Francisco'")
94+
status_code, response = client.delete(table_name="MyTable", filter="Doc <> 'San Francisco'")
8595
print(response)
8696

8797

88-
## drop a table
89-
#client.drop_table("MyTable")
98+
## 8.Drop a table
99+
client.drop_table("MyTable")
90100

91-
## unload a database from memory
92-
#client.unload_db("MyDB")
101+
## 9.Unload a database from memory
102+
client.unload_db("MyDB")
93103
```
94104

95105

106+
## Connect to Epsilla Cloud
96107

97-
### 2 Run epsilla vectordb on epsilla cloud
108+
#### Register and create vectordb on Epsilla Cloud
109+
https://cloud.epsilla.com
98110

111+
#### Use Epsilla Cloud module to connect with the vectordb
112+
Please get the project_id, db_id, epsilla_api_key from Epsilla Cloud at first
99113
```python3
100-
101114
from pyepsilla import cloud
102115

103-
# Connect to Epsilla Cloud
104-
client = cloud.Client(project_id="32ef3a3f-****-****-****-************", api_key="epsilla*****")
116+
epsilla_api_key = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key")
117+
project_id = os.getenv("EPSILLA_PROJECT_ID", "Your-Project-ID")
118+
db_id = os.getenv("EPSILLA_DB_ID", "Your-DB-ID")
119+
120+
121+
# 1.Connect to Epsilla Cloud
122+
client = cloud.Client(project_id="*****-****-****-****-************", api_key="eps_**********")
123+
124+
# 2.Connect to Vectordb
125+
db_client = cloud_client.vectordb(db_id)
126+
127+
# 3.Create a table with schema
128+
status_code, response = db.create_table(
129+
table_name="MyTable",
130+
table_fields=[
131+
{"name": "ID", "dataType": "INT", "primaryKey": True},
132+
{"name": "Doc", "dataType": "STRING"},
133+
{"name": "Embedding", "dataType": "VECTOR_FLOAT", "dimensions": 4},
134+
],
135+
)
136+
print(status_code, response)
137+
138+
# 4.Insert new vector records into table
139+
status_code, response = db.insert(
140+
table_name="MyTable",
141+
records=[
142+
{"ID": 1, "Doc": "Berlin", "Embedding": [0.05, 0.61, 0.76, 0.74]},
143+
{"ID": 2, "Doc": "London", "Embedding": [0.19, 0.81, 0.75, 0.11]},
144+
{"ID": 3, "Doc": "Moscow", "Embedding": [0.36, 0.55, 0.47, 0.94]},
145+
{"ID": 4, "Doc": "San Francisco", "Embedding": [0.18, 0.01, 0.85, 0.80]},
146+
{"ID": 5, "Doc": "Shanghai", "Embedding": [0.24, 0.18, 0.22, 0.44]},
147+
],
148+
)
149+
print(status_code, response)
150+
151+
152+
# 5.Query Vectors with specific response field, otherwise it will return all fields
153+
status_code, response = db.query(
154+
table_name="MyTable",
155+
query_field="Embedding",
156+
query_vector=[0.35, 0.55, 0.47, 0.94],
157+
response_fields=["Doc"],
158+
limit=2,
159+
)
160+
print(status_code, response)
161+
162+
163+
# 6.Delete specific records from table
164+
status_code, response = db.delete(table_name="MyTable", primary_keys=[4, 5])
165+
status_code, response = db.delete(table_name="MyTable", filter="Doc <> 'San Francisco'")
166+
print(status_code, response)
167+
168+
# 7.Drop table
169+
status_code, response = db.drop_table(table_name="MyTable")
170+
print(status_code, response)
105171

106-
# Connect to Vectordb
107-
db = client.vectordb(db_id="df7431d0-****-****-****-************")
108172

109173
```
110-
Please check https://github.com/epsilla-cloud/epsilla-python-client/blob/main/examples/hello_epsilla_cloud.py for detail.
111174

112175

176+
## Connect to Epsilla RAG
177+
Please get the project_id, epsilla_api_key, ragapp_id, converstation_id(optional) from Epsilla Cloud at first
178+
The resp will contains answer as well as contexts, like {"answer": "****", "contexts": ['context1','context2', ...]}
179+
180+
```python3
181+
from pyepsilla import cloud
182+
183+
# 1.Connect to Epsilla RAG
184+
client = cloud.RAG(
185+
project_id="ce07c6fc-****-****-b7bd-b7819f22bcff",
186+
api_key="eps_**********",
187+
ragapp_id="153a5a49-****-****-b2b8-496451eda8b5",
188+
conversation_id="6fa22a6a-****-****-b1c3-5c795d0f45ef",
189+
)
190+
191+
# 2.Start a new conversation with RAG
192+
client.start_new_conversation()
193+
resp = client.query("What's RAG?")
194+
195+
print("[INFO] response is", resp)
196+
```
113197

114198

115199
## Contributing
116-
Bug reports and pull requests are welcome on GitHub at https://github.com/epsilla-cloud/epsilla-python-client/
200+
Bug reports and pull requests are welcome on GitHub at [here](https://github.com/epsilla-cloud/epsilla-python-client)
117201

118-
If you have any question or problem, please join our discord https://discord.com/invite/cDaY2CxZc5
202+
If you have any question or problem, please join our [discord](https://discord.com/invite/cDaY2CxZc5)
119203

120-
## We love your <a href="https://forms.gle/z73ra1sGBxH9wiUR8">Feedback</a>!
204+
We love your <a href="https://forms.gle/z73ra1sGBxH9wiUR8">Feedback</a>!
121205

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
5+
# Question Answering Pipeline with LangChain and Epsilla
6+
# Step1. Install the required packages
7+
"""
8+
pip install langchain
9+
pip install openai
10+
pip install tiktoken
11+
pip install pyepsilla
12+
pip install -U langchain-community
13+
pip install -U langchain-openai
14+
"""
15+
16+
17+
# Step2. Configure the OpenAI API Key
18+
import os
19+
os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key"
20+
21+
22+
# Step3. Load the documents
23+
from langchain.document_loaders import WebBaseLoader
24+
from langchain.text_splitter import CharacterTextSplitter
25+
from langchain_openai import OpenAIEmbeddings
26+
27+
loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt")
28+
documents = loader.load()
29+
documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)
30+
embeddings = OpenAIEmbeddings()
31+
32+
33+
# Step4. Load the vector store
34+
from langchain_community.vectorstores import Epsilla
35+
from pyepsilla import vectordb
36+
37+
db_client = vectordb.Client(protocol="https", host="demo.epsilla.com", port="443")
38+
39+
status_code, response = db_client.load_db("MyDB", "/data/MyDB")
40+
print(status_code, response)
41+
42+
vector_store = Epsilla.from_documents(
43+
documents,
44+
embeddings,
45+
db_client,
46+
db_path="/data/MyDB",
47+
db_name="MyDB",
48+
collection_name="MyCollection",
49+
)
50+
51+
52+
53+
54+
# Step4. Create the QA for Retrieval
55+
from langchain.chains import RetrievalQA
56+
from langchain_openai import OpenAI
57+
58+
qa = RetrievalQA.from_chain_type(
59+
llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever()
60+
)
61+
query = "What did the president say about Ketanji Brown Jackson"
62+
resp = qa.invoke(query)
63+
print("resp:", resp)
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
5+
# Question Answering Pipeline with LangChain and Epsilla
6+
# Step1. Install the required packages
7+
"""
8+
pip install langchain
9+
pip install openai
10+
pip install tiktoken
11+
pip install pyepsilla
12+
pip install -U langchain-openai
13+
pip install -U langchain-community
14+
"""
15+
16+
17+
# Step2. Configure the OpenAI API Key
18+
import os
19+
20+
os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key"
21+
epsilla_api_key = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key")
22+
project_id = os.getenv("EPSILLA_PROJECT_ID", "Your-Project-ID")
23+
db_id = os.getenv("EPSILLA_DB_ID", "Your-DB-ID")
24+
db_sharding_id = os.getenv("EPSILLA_DB_SHARDING_ID", 0)
25+
26+
27+
# Step3. Load the documents
28+
from langchain.document_loaders import WebBaseLoader
29+
from langchain.text_splitter import CharacterTextSplitter
30+
from langchain_openai import OpenAIEmbeddings
31+
32+
loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt")
33+
documents = loader.load()
34+
documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)
35+
embeddings = OpenAIEmbeddings()
36+
37+
38+
# Step4. Load the vector store
39+
from langchain_community.vectorstores import Epsilla
40+
from pyepsilla import cloud
41+
42+
db_name = f"db_{db_id.replace('-', '_')}"
43+
db_path = f"/data/{project_id}/{db_name}/s{db_sharding_id}"
44+
table_name = "MyCollection"
45+
46+
# Connect to Epsilla Cloud
47+
cloud_client = cloud.Client(
48+
project_id=project_id,
49+
api_key=epsilla_api_key,
50+
)
51+
52+
# Connect to Vectordb
53+
db_client = cloud_client.vectordb(db_id)
54+
55+
vector_store = Epsilla.from_documents(
56+
documents,
57+
embeddings,
58+
db_client,
59+
db_path=db_path,
60+
db_name=db_name,
61+
collection_name=table_name,
62+
)
63+
64+
# query = "What did the president say about Ketanji Brown Jackson"
65+
# docs = vector_store.similarity_search(query)
66+
# print(docs[0].page_content)
67+
68+
69+
70+
# Step4. Create the QA for Retrieval
71+
from langchain.chains import RetrievalQA
72+
from langchain_openai import OpenAI
73+
74+
qa = RetrievalQA.from_chain_type(
75+
llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever()
76+
)
77+
query = "What did the president say about Ketanji Brown Jackson"
78+
resp = qa.invoke(query)
79+
print("resp:", resp)

examples/hello_epsilla.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# client = vectordb.Client(protocol='https', host='demo.epsilla.com', port='443')
1717

1818
# Load DB with path
19-
## pay attention to change db_path to persistent volume for production environment
19+
# pay attention to change db_path to persistent volume for production environment
2020
status_code, response = client.load_db(db_name="MyDB", db_path="/data/epsilla_demo")
2121
print(response)
2222

0 commit comments

Comments
 (0)