summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--notebooks/testing.ipynb138
1 files changed, 90 insertions, 48 deletions
diff --git a/notebooks/testing.ipynb b/notebooks/testing.ipynb
index de5e76e..4d64cf2 100644
--- a/notebooks/testing.ipynb
+++ b/notebooks/testing.ipynb
@@ -66,57 +66,83 @@
{
"cell_type": "code",
"execution_count": 6,
- "id": "b845bb31-0909-42cb-9957-9a8b3bb0b5c4",
+ "id": "aa279b1a-465e-4820-ab56-b25fc513c0a1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "emb_db = Embeddings()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1dc655de-2359-42ce-b705-76ec06c5f72f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "485\n"
+ ]
+ }
+ ],
+ "source": [
+ "emb_db.add(embs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "57173d80-9519-479e-9cd9-ba7ccdae7d6b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "(1024,)"
+ "CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=485, indexed_vectors_count=0, points_count=485, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None), payload_schema={})"
]
},
- "execution_count": 6,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "embs[0].shape"
+ "emb_db.client.get_collection(collection_name=\"knowledge-base\")"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "aa279b1a-465e-4820-ab56-b25fc513c0a1",
+ "execution_count": null,
+ "id": "56c9df8a-cbf6-4051-8f4b-cb1eb89a536e",
"metadata": {},
"outputs": [],
"source": [
- "emb_db = Embeddings()"
+ "embs[125]"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "id": "1dc655de-2359-42ce-b705-76ec06c5f72f",
+ "execution_count": null,
+ "id": "117d3416-e79f-436f-a33e-ffb45b972b72",
"metadata": {},
"outputs": [],
"source": [
- "emb_db.add(embs)"
+ "q = \"non-parametric least-square\\nestimation and the parametric MLE under Gaussian assumption?\""
]
},
{
"cell_type": "code",
- "execution_count": 9,
- "id": "117d3416-e79f-436f-a33e-ffb45b972b72",
+ "execution_count": null,
+ "id": "b31f0362-6def-4e50-a31c-8b7e2995c62b",
"metadata": {},
"outputs": [],
- "source": [
- "q = \"the variance of the portfolio\""
- ]
+ "source": []
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "3a6ef474-678c-4525-8dcb-ece67aa9c7ea",
"metadata": {},
"outputs": [],
@@ -126,31 +152,37 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "4c8a16ba-6025-4a6e-95c2-bbba7a9a5de5",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(array([[122.61588, 127.4572 , 128.07301, 128.97739, 131.64783]],\n",
- " dtype=float32),\n",
- " array([[149, 47, 224, 255, 254]]))"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
+ "source": [
+ "hits = emb_db.search(qe, 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3fa73421-6df0-4f7b-96da-23a394eb442e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "hits"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21e0aab5-7f42-4fcc-9495-446968fc0c88",
+ "metadata": {},
+ "outputs": [],
"source": [
- "s, i = emb_db.search(qe, 5)\n",
- "s,i"
+ "emb_db.client.get_collection(collection_name=\"knowledge-base\")"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "1c90dd20-c640-48b5-88c0-4ba93b60c5e6",
"metadata": {},
"outputs": [],
@@ -160,28 +192,17 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "ed69d8bf-93f1-4353-a4c2-c4aacbe25420",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"docs_db.add_document(chunks)"
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "40ebc825-2e2c-4110-93ff-ae6ec3dc1322",
"metadata": {},
"outputs": [],
@@ -192,9 +213,30 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "0a4c565e-aace-4ea5-9093-9266b466b06c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from qdrant_client import QdrantClient"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"id": "4352b32d-c2e7-4fbf-aa05-fc46baf7c9f8",
"metadata": {},
"outputs": [],
+ "source": [
+ "\n",
+ "qdrant = QdrantClient(\"http://localhost:6333\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8382795-9610-4b24-80b7-31397b2faf90",
+ "metadata": {},
+ "outputs": [],
"source": []
}
],