epsilla-cloud
diff --git a/‎examples/Question_Answering_Pipeline_with_LangChain_and_Epsilla.py‎
Lines changed: 7 additions & 4 deletions b/‎examples/Question_Answering_Pipeline_with_LangChain_and_Epsilla.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎examples/Question_Answering_Pipeline_with_LangChain_and_EpsillaCloud.py‎
Lines changed: 22 additions & 16 deletions b/‎examples/Question_Answering_Pipeline_with_LangChain_and_EpsillaCloud.py‎
Lines changed: 22 additions & 16 deletions
diff --git a/‎examples/gist-960-euclidean.py‎
Lines changed: 38 additions & 25 deletions b/‎examples/gist-960-euclidean.py‎
Lines changed: 38 additions & 25 deletions
diff --git a/‎examples/hello_epsilla.py‎
Lines changed: 19 additions & 14 deletions b/‎examples/hello_epsilla.py‎
Lines changed: 19 additions & 14 deletions
@@ -16,6 +16,7 @@
 
 # Step2. Configure the OpenAI API Key
 import os
+
 os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key"
 
 
@@ -24,9 +25,13 @@
 from langchain.text_splitter import CharacterTextSplitter
 from langchain_openai import OpenAIEmbeddings
 
-loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt")
+loader = WebBaseLoader(
+    "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt"
+)
 documents = loader.load()
-documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)
+documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(
+    documents
+)
 embeddings = OpenAIEmbeddings()
 
 
@@ -49,8 +54,6 @@
 )
 
 
-
-
 # Step4. Create the QA for Retrieval
 from langchain.chains import RetrievalQA
 from langchain_openai import OpenAI
 
@@ -18,55 +18,61 @@
 import os
 
 os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key"
-epsilla_api_key = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key")
-project_id = os.getenv("EPSILLA_PROJECT_ID", "Your-Project-ID")
-db_id = os.getenv("EPSILLA_DB_ID", "Your-DB-ID")
-db_sharding_id = os.getenv("EPSILLA_DB_SHARDING_ID", 0)
 
+EPSILLA_PROJECT_ID = os.getenv("EPSILLA_PROJECT_ID", "Your-Epsilla-Project-ID")
+EPSILLA_API_KEY = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key")
+EPSILLA_DB_ID = os.getenv("EPSILLA_DB_ID", "Your-Epsilla-DB-ID")
+EPSILLA_DB_SHARDING_ID = os.getenv("EPSILLA_DB_SHARDING_ID", 0)
+
+TABLE_NAME = os.getenv("TABLE_NAME", "MyTable")
+
+db_name = f"db_{EPSILLA_DB_ID.replace('-', '_')}"
+db_path = f"/data/{EPSILLA_PROJECT_ID}/{db_name}/s{EPSILLA_DB_SHARDING_ID}"
+
+
+from langchain.text_splitter import CharacterTextSplitter
 
 # Step3. Load the documents
 from langchain_community.document_loaders import WebBaseLoader
-from langchain.text_splitter import CharacterTextSplitter
 from langchain_openai import OpenAIEmbeddings
 
-loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt")
+loader = WebBaseLoader(
+    "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt"
+)
 documents = loader.load()
-documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)
+documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(
+    documents
+)
 embeddings = OpenAIEmbeddings()
 
 
 # Step4. Load the vector store
 from langchain_community.vectorstores import Epsilla
 from pyepsilla import cloud
 
-db_name = f"db_{db_id.replace('-', '_')}"
-db_path = f"/data/{project_id}/{db_name}/s{db_sharding_id}"
-table_name = "MyCollection"
-
 # Step4.1 Connect to Epsilla Cloud
 cloud_client = cloud.Client(
-    project_id=project_id,
-    api_key=epsilla_api_key,
+    project_id=EPSILLA_PROJECT_ID,
+    api_key=EPSILLA_API_KEY,
 )
 
 # Step4.2 Connect to Vectordb
-db_client = cloud_client.vectordb(db_id)
+db_client = cloud_client.vectordb(EPSILLA_DB_ID)
 
 vector_store = Epsilla.from_documents(
     documents,
     embeddings,
     db_client,
     db_path=db_path,
     db_name=db_name,
-    collection_name=table_name,
+    collection_name=TABLE_NAME,
 )
 
 # query = "What did the president say about Ketanji Brown Jackson"
 # docs = vector_store.similarity_search(query)
 # print(docs[0].page_content)
 
 
-
 # Step5. Create the QA for Retrieval
 from langchain.chains import RetrievalQA
 from langchain_openai import OpenAI
 
@@ -7,72 +7,85 @@
 # 3. wget http://ann-benchmarks.com/gist-960-euclidean.hdf5
 # 4. python3 gist-960-euclidean.py
 
-from pyepsilla import vectordb
-import os, h5py, datetime
+import datetime
+import os
 from urllib.parse import urlparse
 
-## Connect to Epsilla vector database
-client = vectordb.Client(host='127.0.0.1', port='8888')
-client.load_db(db_name="benchmark", db_path="/tmp/epsilla", vector_scale=1000000, wal_enabled=False) ## pay attention to change db_path to persistent volume for production environment
+import h5py
+from pyepsilla import vectordb
+
+# Connect to Epsilla vector database
+client = vectordb.Client(host="127.0.0.1", port="8888")
+client.load_db(
+    db_name="benchmark", db_path="/tmp/epsilla", vector_scale=1000000, wal_enabled=False
+)  # pay attention to change db_path to persistent volume for production environment
 client.use_db(db_name="benchmark")
 
-## Check gist-960-euclidean dataset hdf5 file to download or not
+# Check gist-960-euclidean dataset hdf5 file to download or not
 dataset_download_url = "http://ann-benchmarks.com/gist-960-euclidean.hdf5"
 dataset_filename = os.path.basename(urlparse(dataset_download_url).path)
 if not os.path.isfile(dataset_filename):
     os.system("wget --no-check-certificate {}".format(dataset_download_url))
 
-## Read gist-960-euclidean data from hdf5
-f = h5py.File('gist-960-euclidean.hdf5', 'r')
+# Read gist-960-euclidean data from hdf5
+f = h5py.File("gist-960-euclidean.hdf5", "r")
 print(list(f.keys()))
 training_data = f["train"]
 size = training_data.size
 records_num, dimensions = training_data.shape
 
-## Create table for gist-960-euclidean
+# Create table for gist-960-euclidean
 id_field = {"name": "id", "dataType": "INT", "primaryKey": True}
 vec_field = {"name": "vector", "dataType": "VECTOR_FLOAT", "dimensions": dimensions}
 fields = [id_field, vec_field]
 status_code, response = client.create_table(table_name="benchmark", table_fields=fields)
 
-## Insert 20000 data into table
-records_data = [ {"id": i, "vector": training_data[i].tolist()} for i in range(10000)]
+# Insert 20000 data into table
+records_data = [{"id": i, "vector": training_data[i].tolist()} for i in range(10000)]
 client.insert(table_name="benchmark", records=records_data)
 
-## Insert all data into table
-indexs = [ i for i in range(0, records_num+10000, 50000)]
+# Insert all data into table
+indexs = [i for i in range(0, records_num + 10000, 50000)]
 print("Begin to insert all gist data into table ...")
-for i in range(len(indexs)-1):
-    print("-"*20)
-    start=datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
-    print(indexs[i], indexs[i+1])
-    records_data = [{"id": i, "vector": training_data[i].tolist()} for i in range(indexs[i], indexs[i+1])]
+for i in range(len(indexs) - 1):
+    print("-" * 20)
+    start = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
+    print(indexs[i], indexs[i + 1])
+    records_data = [
+        {"id": i, "vector": training_data[i].tolist()}
+        for i in range(indexs[i], indexs[i + 1])
+    ]
     client.insert(table_name="benchmark", records=records_data)
     end = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
     print("START:", start, "\nEND  :", end)
 
 
-## Delete some data by ids
+# Delete some data by ids
 # client.delete(table_name="benchmark", ids=[300033, 600066])
 client.delete(table_name="benchmark", ids=[9999])
 
 
-## Rebuild ann graph, it will wait until rebuild is finished, wait time is depended on the amount of dataset
+# Rebuild ann graph, it will wait until rebuild is finished, wait time is depended on the amount of dataset
 client.rebuild()
 
-## Query vectors
+# Query vectors
 query_field = "vector"
 query_vector = training_data[40000].tolist()
 response_fields = ["id"]
 limit = 2
 
-status_code, response = client.query(table_name="benchmark", query_field=query_field, query_vector=query_vector, response_fields=response_fields, limit=limit, with_distance=True)
+status_code, response = client.query(
+    table_name="benchmark",
+    query_field=query_field,
+    query_vector=query_vector,
+    response_fields=response_fields,
+    limit=limit,
+    with_distance=True,
+)
 print("Response:", response)
 
 
-## Get
+# Get
 status_code, body = client.get(table_name="benchmark")
 print("Status Code:", status_code)
 print("Size of result gotten", len(body["result"]))
-
-
 
@@ -7,25 +7,30 @@
 # 3. python3 simple_example.py
 #
 
+import os
+
 from pyepsilla import vectordb
 
 # Connect to Epsilla VectorDB
 client = vectordb.Client(protocol="http", host="127.0.0.1", port="8888")
 
-# You can also use Epsilla Cloud
-# client = vectordb.Client(protocol='https', host='demo.epsilla.com', port='443')
+
+DB_NAME = os.getenv("DB_NAME", "MyDB")
+DB_PATH = os.getenv("DB_PATH", "/tmp/epsilla_demo")
+TABLE_NAME = os.getenv("TABLE_NAME", "MyTable")
+
 
 # Load DB with path
 # pay attention to change db_path to persistent volume for production environment
-status_code, response = client.load_db(db_name="MyDB", db_path="/data/epsilla_demo")
+status_code, response = client.load_db(db_name=DB_NAME, db_path=DB_PATH)
 print(response)
 
 # Set DB to current DB
-client.use_db(db_name="MyDB")
+client.use_db(db_name=DB_NAME)
 
 # Create a table with schema in current DB
 status_code, response = client.create_table(
-    table_name="MyTable",
+    table_name=TABLE_NAME,
     table_fields=[
         {"name": "ID", "dataType": "INT", "primaryKey": True},
         {"name": "Doc", "dataType": "STRING"},
@@ -40,7 +45,7 @@
 
 # Insert new vector records into table
 status_code, response = client.insert(
-    table_name="MyTable",
+    table_name=TABLE_NAME,
     records=[
         {"ID": 1, "Doc": "Berlin", "Embedding": [0.05, 0.61, 0.76, 0.74]},
         {"ID": 2, "Doc": "London", "Embedding": [0.19, 0.81, 0.75, 0.11]},
@@ -53,7 +58,7 @@
 
 # Query Vectors with specific response field
 status_code, response = client.query(
-    table_name="MyTable",
+    table_name=TABLE_NAME,
     query_field="Embedding",
     query_vector=[0.35, 0.55, 0.47, 0.94],
     response_fields=["Doc"],
@@ -62,32 +67,32 @@
 
 # Query Vectors without specific response field, then it will return all fields
 status_code, response = client.query(
-    table_name="MyTable",
+    table_name=TABLE_NAME,
     query_field="Embedding",
     query_vector=[0.35, 0.55, 0.47, 0.94],
     limit=2,
 )
 print(response)
 
 # Get Vectors
-status_code, response = client.get(table_name="MyTable", limit=2)
+status_code, response = client.get(table_name=TABLE_NAME, limit=2)
 print(response)
 
 # Get Statistics
 status_code, response = client.statistics()
 print(response)
 
 # Delete Vectors
-# status_code, response =  client.delete(table_name="MyTable", ids=[3])
-status_code, response = client.delete(table_name="MyTable", primary_keys=[3, 4])
-# status_code, response =  client.delete(table_name="MyTable", filter="Doc <> 'San Francisco'")
+# status_code, response =  client.delete(table_name=TABLE_NAME, ids=[3])
+status_code, response = client.delete(table_name=TABLE_NAME, primary_keys=[3, 4])
+# status_code, response =  client.delete(table_name=TABLE_NAME, filter="Doc <> 'San Francisco'")
 print(response)
 
 
 # Drop table
-# status_code, response = client.drop_table("MyTable")
+# status_code, response = client.drop_table(TABLE_NAME)
 # print(response)
 
 # Unload db
-# status_code, response = client.unload_db("MyDB")
+# status_code, response = client.unload_db(DB_NAME)
 # print(response)