{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c1f56ae3-a056-4b31-bcab-27c2c97c00f1", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "from importlib.util import find_spec\n", "if find_spec(\"rag\") is None:\n", " import sys\n", " sys.path.append('..')\n", "from rag.rag import RAG" ] }, { "cell_type": "code", "execution_count": 2, "id": "6b5cb12e-df7e-4532-b78b-216e11ed6161", "metadata": {}, "outputs": [], "source": [ "path = Path(\"/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "b8382795-9610-4b24-80b7-31397b2faf90", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2024-04-06 01:37:11.913\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.db.document\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreating documents table if it does not exist...\u001b[0m\n", "\u001b[32m2024-04-06 01:37:11.926\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.db.vector\u001b[0m:\u001b[36m__configure\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCollection knowledge-base already exists...\u001b[0m\n" ] } ], "source": [ "rag = RAG()" ] }, { "cell_type": "code", "execution_count": 4, "id": "ac57e50d-1fc3-4fc9-90e5-5bdb97bd2f5e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2024-04-06 01:37:17.243\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.db.document\u001b[0m:\u001b[36madd_document\u001b[0m:\u001b[36m37\u001b[0m - \u001b[34m\u001b[1mInserting document hash into documents db...\u001b[0m\n", "\u001b[32m2024-04-06 01:37:17.244\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.db.document\u001b[0m:\u001b[36m__hash\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mGenerating sha256 hash for pdf document\u001b[0m\n", "\u001b[32m2024-04-06 01:37:17.247\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.rag\u001b[0m:\u001b[36madd_pdf\u001b[0m:\u001b[36m31\u001b[0m - \u001b[34m\u001b[1mDocument already exists!\u001b[0m\n" ] } ], "source": [ "rag.add_pdf(path)" ] }, { "cell_type": "code", "execution_count": 5, "id": "1c6b48d2-eb04-4a7c-8224-78aabfc7c887", "metadata": {}, "outputs": [], "source": [ "query = \"What is a factor model?\"" ] }, { "cell_type": "code", "execution_count": 6, "id": "a95c8250-00b2-4cbc-a9c6-a76d14ef2da5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2024-04-06 01:37:17.265\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.llm.encoder\u001b[0m:\u001b[36mencode_query\u001b[0m:\u001b[36m33\u001b[0m - \u001b[34m\u001b[1mEncoding query: What is a factor model?\u001b[0m\n", "\u001b[32m2024-04-06 01:37:17.858\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.db.vector\u001b[0m:\u001b[36msearch\u001b[0m:\u001b[36m51\u001b[0m - \u001b[34m\u001b[1mSearching for vectors...\u001b[0m\n", "\u001b[32m2024-04-06 01:37:17.864\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.rag\u001b[0m:\u001b[36m__context\u001b[0m:\u001b[36m35\u001b[0m - \u001b[34m\u001b[1mGot 5 hits in the vector db with limit=5\u001b[0m\n", "\u001b[32m2024-04-06 01:37:17.865\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mrag.llm.generator\u001b[0m:\u001b[36mgenerate\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mGenerating answer...\u001b[0m\n" ] }, { "data": { "text/plain": [ "'A factor model is a type of model used to explain the returns or movements of financial assets by decomposing them into two parts: systematic risk, which is driven by a small number of factors affecting many securities; and idiosyncratic risk, which is specific to individual stocks. The general factor model is rt = φ0 + h(ft) + wt, where rt denotes the return of an asset at time t, φ0 represents a constant vector, ft is a vector of factors responsible for most of the randomness in the market, and h is a function that summarizes how these low-dimensional factors affect higher-dimensional markets. The residual wt accounts for any remaining uncorrelated perturbations with only a marginal effect on returns.'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rag.rag(query, \"quant researcher\", limit=5)" ] }, { "cell_type": "code", "execution_count": null, "id": "11ac0d46-8589-4700-abf7-7959afbf611c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }