Homeopathy-LLM / ingest_book.py
ItsCxdy's picture
Upload 12 files
e123a74 verified
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
def main():
print("πŸ“– Loading homeopathy_book.pdf...")
loader = PyPDFLoader("homeopathy_book.pdf")
documents = loader.load()
print("βœ‚οΈ Splitting text into chunks...")
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)
print(f"🧠 Creating memory database with {len(chunks)} chunks...")
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vector_store = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
persist_directory="./vector_db"
)
print("βœ… Ingestion complete! Memory saved to 'vector_db' folder.")
if __name__ == "__main__":
main()