summaryrefslogtreecommitdiff
path: root/notebooks
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-05-29 00:53:39 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-05-29 00:53:39 +0200
commit716e3fe58adee5b8a6bfa91de4b3ba6cf204d172 (patch)
tree778da9011d21051006fc206ce0978f0fc114b77b /notebooks
parent2d91c118d71a8dd7fbd7f9cf21f86e92da33827e (diff)
Wip memory
Diffstat (limited to 'notebooks')
-rw-r--r--notebooks/testing.ipynb203
1 files changed, 194 insertions, 9 deletions
diff --git a/notebooks/testing.ipynb b/notebooks/testing.ipynb
index d255438..26ff2e4 100644
--- a/notebooks/testing.ipynb
+++ b/notebooks/testing.ipynb
@@ -2,12 +2,13 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 1,
"id": "c1f56ae3-a056-4b31-bcab-27c2c97c00f1",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
+ "from dotenv import load_dotenv\n",
"\n",
"from importlib.util import find_spec\n",
"if find_spec(\"rag\") is None:\n",
@@ -17,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 2,
"id": "240df289-d9d6-424b-a5b9-e09dcfefd57e",
"metadata": {},
"outputs": [],
@@ -27,7 +28,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 7,
"id": "01305de3-0d6d-46e7-a0f0-2e2a2f72d563",
"metadata": {},
"outputs": [
@@ -35,18 +36,41 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "\u001b[32m2024-04-13 22:30:45.267\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.document\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m28\u001b[0m - \u001b[34m\u001b[1mCreating documents table if it does not exist...\u001b[0m\n",
- "\u001b[32m2024-04-13 22:30:45.278\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m43\u001b[0m - \u001b[34m\u001b[1mCollection knowledge-base already exists...\u001b[0m\n"
+ "\u001b[32m2024-04-20 20:49:04.904\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.document\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m28\u001b[0m - \u001b[34m\u001b[1mCreating documents table if it does not exist...\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:49:04.922\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m43\u001b[0m - \u001b[34m\u001b[1mCollection knowledge-base already exists!\u001b[0m\n"
]
}
],
"source": [
- "client = Retriever().vec_db"
+ "load_dotenv()\n",
+ "ret = Retriever()\n",
+ "vecdb = ret.vec_db"
]
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 5,
+ "id": "c83210e2-a5a3-46eb-bd77-aedc0d223190",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CountResult(count=17918)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "client.client.count(\"knowledge-base\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
"id": "4fcb1862-19a3-482b-b397-ce7be074c187",
"metadata": {},
"outputs": [
@@ -56,7 +80,7 @@
"True"
]
},
- "execution_count": 32,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -67,9 +91,170 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"id": "c86d538c-d613-4e4d-8dd2-3abcea2cfeef",
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=17918, indexed_vectors_count=0, points_count=17918, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None), payload_schema={})"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "client.client.get_collection(\"knowledge-base\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "c346ef16-4884-4c3a-b03c-9f789ca00212",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2024-04-20 20:49:52.237\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is a convex function?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:49:52.239\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is a convex function?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:49:53.191\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:49:53.198\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n",
+ " Document(title='LinuxNotesForProfessionals.pdf', text='GoalKicker.com – Linux ® Notes for Professionals 44sudo systemctl disable sshd.service\\nDebian\\nsudo /etc/init.d/ssh stop\\nsudo systemctl disable sshd.service\\nArch Linux\\nsudo killall sshd\\nsudo systemctl disable sshd.service'),\n",
+ " Document(title='Kubernetes_Deployment_Antipatterns_v1.1.pdf', text='Anti-pattern 12\\nNot using the Helm package manager\\n37'),\n",
+ " Document(title='Docker_anti_patterns_vertical_3.pdf', text='Docker Anti-PatternsCONTINUOUS DEPLOYMENT / DELIVERY'),\n",
+ " Document(title='Haskell Design Patterns.pdf', text='B\\nbind\\tchain\\nand\\tmonad\\t/\\t\\nMonads\\tand\\tthe\\tbind\\tchain')]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ret.retrieve(\"what is a convex function?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "36d174df-aa21-4708-ad52-43bb8cfa80e5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2024-04-20 20:50:18.285\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is a hidden markov model?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:50:18.286\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is a hidden markov model?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:50:18.357\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:50:18.364\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[Document(title='thinkocaml.pdf', text='44 Chapter 5. Recursive Functions'),\n",
+ " Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n",
+ " Document(title='thinkdsp.pdf', text='Think DSP\\nDigital Signal Processing in Python\\nVersion 1.1.4\\nAllen B. Downey\\nGreen Tea Press\\nNeedham, Massachusetts'),\n",
+ " Document(title='PFP-0.1.pdf', text='ii'),\n",
+ " Document(title='Learning Bayesian Networks(Neapolitan, Richard).pdf', text='ii')]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ret.retrieve(\"what is a hidden markov model?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "091c5da0-3a9f-4100-8b2d-ee93a8cf3234",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2024-04-20 20:50:42.102\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is the weather today?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:50:42.104\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is the weather today?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:50:42.175\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:50:42.181\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n",
+ " Document(title='category-theory-for-programmers.pdf', text='Category Theory\\nfor Programmers\\nByBartosz Milewski\\ncompiled and edited by\\nIgal Tabachnik'),\n",
+ " Document(title='CNotesForProfessionals.pdf', text='Section 22.10: Pointer to Pointer 141 .............................................................................................................................. \\nSection 22.11: void* pointers as arguments and return values to standard functions 141 ....................................... \\nSection 22.12: Same Asterisk, Di\\ue023erent Meanings 142 ................................................................................................. \\nChapter 23: Sequence points 144 .............................................................................................................................. \\nSection 23.1: Unsequenced expressions 144 .................................................................................................................. \\nSection 23.2: Sequenced expressions 144 ..................................................................................................................... \\nSection 23.3: Indeterminately sequenced expressions 145 ......................................................................................... \\nChapter 24: Function Pointers 146 ........................................................................................................................... \\nSection 24.1: Introduction 146 .......................................................................................................................................... \\nSection 24.2: Returning Function Pointers from a Function 146 ................................................................................. \\nSection 24.3: Best Practices 147 ..................................................................................................................................... \\nSection 24.4: Assigning a Function Pointer 149 ............................................................................................................. \\nSection 24.5: Mnemonic for writing function pointers 149 ........................................................................................... \\nSection 24.6: Basics 150 ................................................................................................................................................... \\nChapter 25: Function Parameters 152 .................................................................................................................... \\nSection 25.1: Parameters are passed by value 152 ...................................................................................................... \\nSection 25.2: Passing in Arrays to Functions 152 .......................................................................................................... \\nSection 25.3: Order of function parameter execution 153 ........................................................................................... \\nSection 25.4: Using pointer parameters to return multiple values 153 ...................................................................... \\nSection 25.5: Example of function returning struct containing values with error codes 154 ................................... \\nChapter 26: Pass 2D-arrays to functions 156 ..................................................................................................... \\nSection 26.1: Pass a 2D-array to a function 156 ........................................................................................................... \\nSection 26.2: Using flat arrays as 2D arrays 162 .......................................................................................................... \\nChapter 27: Error handling 163 .................................................................................................................................. \\nSection 27.1: errno 163 .....................................................................................................................................................'),\n",
+ " Document(title='Beginning Haskell_ A Project-Based Approach.pdf', text='do Notation �������������������������������������������������������������������������������������������������������������������������������������������������������� 150\\nMonad Laws ������������������������������������������������������������������������������������������������������������������������������������������������������ 152'),\n",
+ " Document(title='Learning Bayesian Networks(Neapolitan, Richard).pdf', text='viii CONTENTS')]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ret.retrieve(\"what is the weather today?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "8830235e-1cdc-46b1-9a0f-96d7df6fc183",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2024-04-20 20:51:23.336\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.retriever\u001b[0m:\u001b[36mretrieve\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mFinding documents matching query: what is ocaml?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:51:23.337\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m41\u001b[0m - \u001b[34m\u001b[1mEncoding query: what is ocaml?\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:51:23.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m58\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n",
+ "\u001b[32m2024-04-20 20:51:23.416\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.retriever.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m65\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[Document(title='Haskell Design Patterns.pdf', text='N\\nnon-tail\\trecursion\\t/\\t\\nNon-tail\\trecursion'),\n",
+ " Document(title='category-theory-for-programmers.pdf', text='Category Theory\\nfor Programmers\\nByBartosz Milewski\\ncompiled and edited by\\nIgal Tabachnik'),\n",
+ " Document(title='Docker_anti_patterns_vertical_3.pdf', text='Docker Anti-PatternsCONTINUOUS DEPLOYMENT / DELIVERY'),\n",
+ " Document(title='thinkocaml.pdf', text='90 Chapter 12. Hashtables'),\n",
+ " Document(title='Beginning Haskell_ A Project-Based Approach.pdf', text='do Notation �������������������������������������������������������������������������������������������������������������������������������������������������������� 150\\nMonad Laws ������������������������������������������������������������������������������������������������������������������������������������������������������ 152')]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ret.retrieve(\"what is ocaml?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d6d0c2d0-25a3-446b-a103-b8b56c82296c",
+ "metadata": {},
"outputs": [],
"source": []
}