summaryrefslogtreecommitdiff
path: root/rag/rag.py
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-06 00:19:24 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-06 00:19:24 +0200
commit59c77c93c39755526e3d7649660780584b1c090d (patch)
treeb2c0e0a358883c1d82ea1c53bdcaa64315d20f5d /rag/rag.py
parent4b5a939a36c64175b7497d05b3417ee46371a917 (diff)
Wip rag
Diffstat (limited to 'rag/rag.py')
-rw-r--r--rag/rag.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/rag/rag.py b/rag/rag.py
new file mode 100644
index 0000000..5b5f5ab
--- /dev/null
+++ b/rag/rag.py
@@ -0,0 +1,20 @@
+from pathlib import Path
+from typing import List, Optional
+
+from langchain_core.documents.base import Document
+from llm.encoder import Encoder
+from llm.generator import Generator
+from parser import pdf
+from db.documents import Documents
+from db.vectors import Vectors
+
+
+class RAG:
+ def __init__(self) -> None:
+ self.generator = Generator()
+ self.encoder = Encoder()
+ self.docs = Documents()
+ self.vectors = Vectors()
+
+ def add_pdf(self, filepath: Path) -> Optional[List[Document]]:
+ chunks = pdf.parser(filepath)