summaryrefslogtreecommitdiff
path: root/rag/parser
diff options
context:
space:
mode:
authorGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-05 01:57:31 +0200
committerGustaf Rydholm <gustaf.rydholm@gmail.com>2024-04-05 01:57:31 +0200
commit07741e09f4569d27aca3ffa111178bc324af1eab (patch)
tree17d2dcb35c3b7cc4e4e8a2602e3a4f50e671f634 /rag/parser
parent901212a2fd9658e9b51a4e977c68a47ef7b840b7 (diff)
Add .env for parser
Diffstat (limited to 'rag/parser')
-rw-r--r--rag/parser/pdf.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/rag/parser/pdf.py b/rag/parser/pdf.py
index a5e4665..1680a47 100644
--- a/rag/parser/pdf.py
+++ b/rag/parser/pdf.py
@@ -8,7 +8,8 @@ from langchain_community.document_loaders import PyPDFLoader
def parser(filepath: Path):
content = PyPDFLoader(filepath).load()
splitter = RecursiveCharacterTextSplitter(
- chunk_size=os.environ["CHUNK_SIZE"], chunk_overlap=os.environ["CHUNK_OVERLAP"]
+ chunk_size=int(os.environ["CHUNK_SIZE"]),
+ chunk_overlap=int(os.environ["CHUNK_OVERLAP"]),
)
chunks = splitter.split_documents(content)
return chunks