diff options
Diffstat (limited to 'notebooks/testing.ipynb')
-rw-r--r-- | notebooks/testing.ipynb | 138 |
1 files changed, 90 insertions, 48 deletions
diff --git a/notebooks/testing.ipynb b/notebooks/testing.ipynb index de5e76e..4d64cf2 100644 --- a/notebooks/testing.ipynb +++ b/notebooks/testing.ipynb @@ -66,57 +66,83 @@ { "cell_type": "code", "execution_count": 6, - "id": "b845bb31-0909-42cb-9957-9a8b3bb0b5c4", + "id": "aa279b1a-465e-4820-ab56-b25fc513c0a1", + "metadata": {}, + "outputs": [], + "source": [ + "emb_db = Embeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1dc655de-2359-42ce-b705-76ec06c5f72f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "485\n" + ] + } + ], + "source": [ + "emb_db.add(embs)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "57173d80-9519-479e-9cd9-ba7ccdae7d6b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(1024,)" + "CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=485, indexed_vectors_count=0, points_count=485, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None), payload_schema={})" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "embs[0].shape" + "emb_db.client.get_collection(collection_name=\"knowledge-base\")" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "aa279b1a-465e-4820-ab56-b25fc513c0a1", + "execution_count": null, + "id": "56c9df8a-cbf6-4051-8f4b-cb1eb89a536e", "metadata": {}, "outputs": [], "source": [ - "emb_db = Embeddings()" + "embs[125]" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "1dc655de-2359-42ce-b705-76ec06c5f72f", + "execution_count": null, + "id": "117d3416-e79f-436f-a33e-ffb45b972b72", "metadata": {}, "outputs": [], "source": [ - "emb_db.add(embs)" + "q = \"non-parametric least-square\\nestimation and the parametric MLE under Gaussian assumption?\"" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "117d3416-e79f-436f-a33e-ffb45b972b72", + "execution_count": null, + "id": "b31f0362-6def-4e50-a31c-8b7e2995c62b", "metadata": {}, "outputs": [], - "source": [ - "q = \"the variance of the portfolio\"" - ] + "source": [] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "3a6ef474-678c-4525-8dcb-ece67aa9c7ea", "metadata": {}, "outputs": [], @@ -126,31 +152,37 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "4c8a16ba-6025-4a6e-95c2-bbba7a9a5de5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([[122.61588, 127.4572 , 128.07301, 128.97739, 131.64783]],\n", - " dtype=float32),\n", - " array([[149, 47, 224, 255, 254]]))" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "hits = emb_db.search(qe, 100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fa73421-6df0-4f7b-96da-23a394eb442e", + "metadata": {}, + "outputs": [], + "source": [ + "hits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21e0aab5-7f42-4fcc-9495-446968fc0c88", + "metadata": {}, + "outputs": [], "source": [ - "s, i = emb_db.search(qe, 5)\n", - "s,i" + "emb_db.client.get_collection(collection_name=\"knowledge-base\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "1c90dd20-c640-48b5-88c0-4ba93b60c5e6", "metadata": {}, "outputs": [], @@ -160,28 +192,17 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "ed69d8bf-93f1-4353-a4c2-c4aacbe25420", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "docs_db.add_document(chunks)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "40ebc825-2e2c-4110-93ff-ae6ec3dc1322", "metadata": {}, "outputs": [], @@ -192,9 +213,30 @@ { "cell_type": "code", "execution_count": null, + "id": "0a4c565e-aace-4ea5-9093-9266b466b06c", + "metadata": {}, + "outputs": [], + "source": [ + "from qdrant_client import QdrantClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "4352b32d-c2e7-4fbf-aa05-fc46baf7c9f8", "metadata": {}, "outputs": [], + "source": [ + "\n", + "qdrant = QdrantClient(\"http://localhost:6333\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8382795-9610-4b24-80b7-31397b2faf90", + "metadata": {}, + "outputs": [], "source": [] } ], |