import os from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings def main(): print("📖 Loading homeopathy_book.pdf...") loader = PyPDFLoader("homeopathy_book.pdf") documents = loader.load() print("✂️ Splitting text into chunks...") text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) chunks = text_splitter.split_documents(documents) print(f"🧠 Creating memory database with {len(chunks)} chunks...") embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vector_store = Chroma.from_documents( documents=chunks, embedding=embeddings, persist_directory="./vector_db" ) print("✅ Ingestion complete! Memory saved to 'vector_db' folder.") if __name__ == "__main__": main()