Spaces:
Runtime error
Runtime error
| import os | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| def main(): | |
| print("π Loading homeopathy_book.pdf...") | |
| loader = PyPDFLoader("homeopathy_book.pdf") | |
| documents = loader.load() | |
| print("βοΈ Splitting text into chunks...") | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200 | |
| ) | |
| chunks = text_splitter.split_documents(documents) | |
| print(f"π§ Creating memory database with {len(chunks)} chunks...") | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vector_store = Chroma.from_documents( | |
| documents=chunks, | |
| embedding=embeddings, | |
| persist_directory="./vector_db" | |
| ) | |
| print("β Ingestion complete! Memory saved to 'vector_db' folder.") | |
| if __name__ == "__main__": | |
| main() |