{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "c1f56ae3-a056-4b31-bcab-27c2c97c00f1", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "from rag.parser.pdf import parser" ] }, { "cell_type": "code", "execution_count": null, "id": "6b5cb12e-df7e-4532-b78b-216e11ed6161", "metadata": {}, "outputs": [], "source": [ "path = Path(\"/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8e378ed2-e346-4360-8a8e-265f0f1d173d", "metadata": {}, "outputs": [], "source": [ "path" ] }, { "cell_type": "code", "execution_count": null, "id": "f1a57be8-21a2-48d3-b99f-d1bbf7b8780a", "metadata": {}, "outputs": [], "source": [ "chunks = parser(path)" ] }, { "cell_type": "code", "execution_count": null, "id": "b848627e-1e30-492a-ada6-a899112cd515", "metadata": {}, "outputs": [], "source": [ "chunks[32]" ] }, { "cell_type": "code", "execution_count": null, "id": "5f01d39c-3310-4d6d-8b7b-f253975b5e37", "metadata": {}, "outputs": [], "source": [ "\n", "type(chunks[32])" ] }, { "cell_type": "code", "execution_count": null, "id": "49bf6768-1350-42ee-8526-2e6bee2dd7bb", "metadata": {}, "outputs": [], "source": [ "from rag.db.vector import VectorStore" ] }, { "cell_type": "code", "execution_count": null, "id": "98516091-7bc5-43c2-9aa7-967e63b795f1", "metadata": {}, "outputs": [], "source": [ "from rag.llms.encoder import Encoder" ] }, { "cell_type": "code", "execution_count": null, "id": "14408e20-3dec-40b4-9dda-3397beb0c453", "metadata": {}, "outputs": [], "source": [ "encoder = Encoder()" ] }, { "cell_type": "code", "execution_count": null, "id": "e94993c5-6b19-4bac-b7e8-c26a5200a626", "metadata": {}, "outputs": [], "source": [ "embs = encoder.encode([chunks[33].page_content])" ] }, { "cell_type": "code", "execution_count": null, "id": "b845bb31-0909-42cb-9957-9a8b3bb0b5c4", "metadata": {}, "outputs": [], "source": [ "embs[0].shape" ] }, { "cell_type": "code", "execution_count": null, "id": "aa279b1a-465e-4820-ab56-b25fc513c0a1", "metadata": {}, "outputs": [], "source": [ "vec_db = VectorStore()" ] }, { "cell_type": "code", "execution_count": null, "id": "1dc655de-2359-42ce-b705-76ec06c5f72f", "metadata": {}, "outputs": [], "source": [ "vec_db.add(embs[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "117d3416-e79f-436f-a33e-ffb45b972b72", "metadata": {}, "outputs": [], "source": [ "q = \"the variance of the portfolio\"" ] }, { "cell_type": "code", "execution_count": null, "id": "3a6ef474-678c-4525-8dcb-ece67aa9c7ea", "metadata": {}, "outputs": [], "source": [ "qe = encoder.query(q)" ] }, { "cell_type": "code", "execution_count": null, "id": "4c8a16ba-6025-4a6e-95c2-bbba7a9a5de5", "metadata": {}, "outputs": [], "source": [ "s, i = vec_db.search(qe, 1)\n", "s,i" ] }, { "cell_type": "code", "execution_count": null, "id": "2d5f4601-82b0-47b7-8c79-938435591b59", "metadata": {}, "outputs": [], "source": [ "embs[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "e7a65b4d-7269-44bf-9b29-9ef0ee2b2052", "metadata": {}, "outputs": [], "source": [ "qe" ] }, { "cell_type": "code", "execution_count": null, "id": "016bae35-e22b-4e13-9c1d-6fcd09a8ca9f", "metadata": {}, "outputs": [], "source": [ "import hashlib" ] }, { "cell_type": "code", "execution_count": null, "id": "66f7fd80-a970-4ad9-b5c9-02ab2e39855b", "metadata": {}, "outputs": [], "source": [ "document = [chunk.page_content for chunk in chunks]\n", "text = str.encode(\"\".join(document))" ] }, { "cell_type": "code", "execution_count": null, "id": "9d385588-cdd5-4f8c-beae-31685a9f4c63", "metadata": {}, "outputs": [], "source": [ "type(hashlib.sha256(text).hexdigest())" ] }, { "cell_type": "code", "execution_count": null, "id": "1c90dd20-c640-48b5-88c0-4ba93b60c5e6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }