From 716e3fe58adee5b8a6bfa91de4b3ba6cf204d172 Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Wed, 29 May 2024 00:53:39 +0200 Subject: Wip memory --- notebooks/testing.ipynb | 203 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 194 insertions(+), 9 deletions(-) (limited to 'notebooks') diff --git a/notebooks/testing.ipynb b/notebooks/testing.ipynb index d255438..26ff2e4 100644 --- a/notebooks/testing.ipynb +++ b/notebooks/testing.ipynb @@ -2,12 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 27, + "execution_count": 1, "id": "c1f56ae3-a056-4b31-bcab-27c2c97c00f1", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", + "from dotenv import load_dotenv\n", "\n", "from importlib.util import find_spec\n", "if find_spec(\"rag\") is None:\n", @@ -17,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 2, "id": "240df289-d9d6-424b-a5b9-e09dcfefd57e", "metadata": {}, "outputs": [], @@ -27,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 7, "id": "01305de3-0d6d-46e7-a0f0-2e2a2f72d563", "metadata": {}, "outputs": [ @@ -35,18 +36,41 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-04-13 22:30:45.267\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.document\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m28\u001b[0m - \u001b[34m\u001b[1mCreating documents table if it does not exist...\u001b[0m\n", - "\u001b[32m2024-04-13 22:30:45.278\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m43\u001b[0m - \u001b[34m\u001b[1mCollection knowledge-base already exists...\u001b[0m\n" + "\u001b[32m2024-04-20 20:49:04.904\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.document\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m28\u001b[0m - \u001b[34m\u001b[1mCreating documents table if it does not exist...\u001b[0m\n", + "\u001b[32m2024-04-20 20:49:04.922\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m43\u001b[0m - \u001b[34m\u001b[1mCollection knowledge-base already exists!\u001b[0m\n" ] } ], "source": [ - "client = Retriever().vec_db" + "load_dotenv()\n", + "ret = Retriever()\n", + "vecdb = ret.vec_db" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 5, + "id": "c83210e2-a5a3-46eb-bd77-aedc0d223190", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CountResult(count=17918)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.client.count(\"knowledge-base\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "4fcb1862-19a3-482b-b397-ce7be074c187", "metadata": {}, "outputs": [ @@ -56,7 +80,7 @@ "True" ] }, - "execution_count": 32, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -67,9 +91,170 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c86d538c-d613-4e4d-8dd2-3abcea2cfeef", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CollectionInfo(status=, optimizer_status=, vectors_count=17918, indexed_vectors_count=0, points_count=17918, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=, hnsw_config=None, quantization_config=None, on_disk=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None), payload_schema={})" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.client.get_collection(\"knowledge-base\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c346ef16-4884-4c3a-b03c-9f789ca00212", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-04-20 20:49:52.237\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is a convex function?\u001b[0m\n", + "\u001b[32m2024-04-20 20:49:52.239\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is a convex function?\u001b[0m\n", + "\u001b[32m2024-04-20 20:49:53.191\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n", + "\u001b[32m2024-04-20 20:49:53.198\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n", + " Document(title='LinuxNotesForProfessionals.pdf', text='GoalKicker.com – Linux ® Notes for Professionals 44sudo systemctl disable sshd.service\\nDebian\\nsudo /etc/init.d/ssh stop\\nsudo systemctl disable sshd.service\\nArch Linux\\nsudo killall sshd\\nsudo systemctl disable sshd.service'),\n", + " Document(title='Kubernetes_Deployment_Antipatterns_v1.1.pdf', text='Anti-pattern 12\\nNot using the Helm package manager\\n37'),\n", + " Document(title='Docker_anti_patterns_vertical_3.pdf', text='Docker Anti-PatternsCONTINUOUS DEPLOYMENT / DELIVERY'),\n", + " Document(title='Haskell Design Patterns.pdf', text='B\\nbind\\tchain\\nand\\tmonad\\t/\\t\\nMonads\\tand\\tthe\\tbind\\tchain')]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret.retrieve(\"what is a convex function?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "36d174df-aa21-4708-ad52-43bb8cfa80e5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-04-20 20:50:18.285\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is a hidden markov model?\u001b[0m\n", + "\u001b[32m2024-04-20 20:50:18.286\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is a hidden markov model?\u001b[0m\n", + "\u001b[32m2024-04-20 20:50:18.357\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n", + "\u001b[32m2024-04-20 20:50:18.364\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(title='thinkocaml.pdf', text='44 Chapter 5. Recursive Functions'),\n", + " Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n", + " Document(title='thinkdsp.pdf', text='Think DSP\\nDigital Signal Processing in Python\\nVersion 1.1.4\\nAllen B. Downey\\nGreen Tea Press\\nNeedham, Massachusetts'),\n", + " Document(title='PFP-0.1.pdf', text='ii'),\n", + " Document(title='Learning Bayesian Networks(Neapolitan, Richard).pdf', text='ii')]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret.retrieve(\"what is a hidden markov model?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "091c5da0-3a9f-4100-8b2d-ee93a8cf3234", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-04-20 20:50:42.102\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is the weather today?\u001b[0m\n", + "\u001b[32m2024-04-20 20:50:42.104\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is the weather today?\u001b[0m\n", + "\u001b[32m2024-04-20 20:50:42.175\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n", + "\u001b[32m2024-04-20 20:50:42.181\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n", + " Document(title='category-theory-for-programmers.pdf', text='Category Theory\\nfor Programmers\\nByBartosz Milewski\\ncompiled and edited by\\nIgal Tabachnik'),\n", + " Document(title='CNotesForProfessionals.pdf', text='Section 22.10: Pointer to Pointer 141 .............................................................................................................................. \\nSection 22.11: void* pointers as arguments and return values to standard functions 141 ....................................... \\nSection 22.12: Same Asterisk, Di\\ue023erent Meanings 142 ................................................................................................. \\nChapter 23: Sequence points 144 .............................................................................................................................. \\nSection 23.1: Unsequenced expressions 144 .................................................................................................................. \\nSection 23.2: Sequenced expressions 144 ..................................................................................................................... \\nSection 23.3: Indeterminately sequenced expressions 145 ......................................................................................... \\nChapter 24: Function Pointers 146 ........................................................................................................................... \\nSection 24.1: Introduction 146 .......................................................................................................................................... \\nSection 24.2: Returning Function Pointers from a Function 146 ................................................................................. \\nSection 24.3: Best Practices 147 ..................................................................................................................................... \\nSection 24.4: Assigning a Function Pointer 149 ............................................................................................................. \\nSection 24.5: Mnemonic for writing function pointers 149 ........................................................................................... \\nSection 24.6: Basics 150 ................................................................................................................................................... \\nChapter 25: Function Parameters 152 .................................................................................................................... \\nSection 25.1: Parameters are passed by value 152 ...................................................................................................... \\nSection 25.2: Passing in Arrays to Functions 152 .......................................................................................................... \\nSection 25.3: Order of function parameter execution 153 ........................................................................................... \\nSection 25.4: Using pointer parameters to return multiple values 153 ...................................................................... \\nSection 25.5: Example of function returning struct containing values with error codes 154 ................................... \\nChapter 26: Pass 2D-arrays to functions 156 ..................................................................................................... \\nSection 26.1: Pass a 2D-array to a function 156 ........................................................................................................... \\nSection 26.2: Using flat arrays as 2D arrays 162 .......................................................................................................... \\nChapter 27: Error handling 163 .................................................................................................................................. \\nSection 27.1: errno 163 .....................................................................................................................................................'),\n", + " Document(title='Beginning Haskell_ A Project-Based Approach.pdf', text='do Notation �������������������������������������������������������������������������������������������������������������������������������������������������������� 150\\nMonad Laws ������������������������������������������������������������������������������������������������������������������������������������������������������ 152'),\n", + " Document(title='Learning Bayesian Networks(Neapolitan, Richard).pdf', text='viii CONTENTS')]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret.retrieve(\"what is the weather today?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8830235e-1cdc-46b1-9a0f-96d7df6fc183", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-04-20 20:51:23.336\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is ocaml?\u001b[0m\n", + "\u001b[32m2024-04-20 20:51:23.337\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is ocaml?\u001b[0m\n", + "\u001b[32m2024-04-20 20:51:23.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n", + "\u001b[32m2024-04-20 20:51:23.416\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n", + " Document(title='category-theory-for-programmers.pdf', text='Category Theory\\nfor Programmers\\nByBartosz Milewski\\ncompiled and edited by\\nIgal Tabachnik'),\n", + " Document(title='Docker_anti_patterns_vertical_3.pdf', text='Docker Anti-PatternsCONTINUOUS DEPLOYMENT / DELIVERY'),\n", + " Document(title='thinkocaml.pdf', text='90 Chapter 12. Hashtables'),\n", + " Document(title='Beginning Haskell_ A Project-Based Approach.pdf', text='do Notation �������������������������������������������������������������������������������������������������������������������������������������������������������� 150\\nMonad Laws ������������������������������������������������������������������������������������������������������������������������������������������������������ 152')]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret.retrieve(\"what is ocaml?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6d0c2d0-25a3-446b-a103-b8b56c82296c", + "metadata": {}, "outputs": [], "source": [] } -- cgit v1.2.3-70-g09d2