docling-project · ceberam · Apr 28, 2025 · Apr 7, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/.env.example b/.env.example
@@ -0,0 +1,3 @@
+RAG_ENABLED=true
+OLLAMA_MODEL=granite3.2:latest
+EMBEDDING_MODEL=BAAI/bge-base-en-v1.5
diff --git a/README.md b/README.md
@@ -28,9 +28,40 @@ Docling MCP is a service that provides tools for document conversion, processing
 - Support for local files and URLs as document sources
 - Memory management for handling large documents
 - Logging system for debugging and monitoring
+- Milvus upload and retrieval
 
 ## Getting started
 
+### Part 1 - RAG configuration (this can be skipped if you do not want RAG)
+
+Copy the .env.example file to .env in the root of the project.
+
+```sh
+cp .env.example .env
+```
+
+If you want to use the RAG Milvus functionality edit the new .env file to set both environment variables.
+
+```text
+RAG_ENABLED=true
+OLLAMA_MODEL=granite3.2:latest
+EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
+```
+
+Note:
+
+ollama can be downloaded here https://ollama.com/. Once you have ollama download the model you want to use and then add the model string to the .env file.
+
+For example we are using `granite3.2:latest` to perform the RAG search.
+
+To download this model run:
+
+```sh
+ollama pull granite3.2:latest
+```
+
+### Part 2 - Environment setup
+
 Install dependencies
 
 ```sh
@@ -71,6 +102,20 @@ During the writing process, you can check what has been written already by calli
 The document should investigate the impact of tokenizers on the quality of LLM's.
 ```
 
+## RAG documents
+
+Example prompt for RAG system:
+
+```prompt
+Process this file /Users/name/example/mock.pdf 
+
+Upload it to the vector store. 
+
+Then summarize xyz that is contained within the document.
+```
+
+
+
 ## License
 
 The Docling-MCP codebase is under MIT license. For individual model usage, please refer to the model licenses found in the original packages.

diff --git a/docling_mcp/shared.py b/docling_mcp/shared.py
@@ -1,5 +1,16 @@
+import os
+
+from dotenv import load_dotenv
+from llama_index.core import Settings
+from llama_index.core.indices.vector_store.base import VectorStoreIndex
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.ollama import Ollama
+from llama_index.node_parser.docling import DoclingNodeParser
+from llama_index.vector_stores.milvus import MilvusVectorStore
 from mcp.server.fastmcp import FastMCP
 
+load_dotenv()
+
 from docling_core.types.doc.document import (
     DoclingDocument,
     NodeItem,
@@ -13,3 +24,22 @@
 # Define your shared cache here if it's used by multiple tools
 local_document_cache: dict[str, DoclingDocument] = {}
 local_stack_cache: dict[str, list[NodeItem]] = {}
+
+if (
+    os.getenv("RAG_ENABLED") == "true"
+    and os.getenv("OLLAMA_MODEL") != ""
+    and os.getenv("EMBEDDING_MODEL") != ""
+):
+    embed_model = HuggingFaceEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
+    Settings.embed_model = embed_model
+    Settings.llm = Ollama(model=os.getenv("OLLAMA_MODEL"), request_timeout=120.0)
+
+    node_parser = DoclingNodeParser()
+
+    embed_dim = len(embed_model.get_text_embedding("hi"))
+
+    milvus_vector_store = MilvusVectorStore(
+        uri="./milvus_demo.db", dim=embed_dim, overwrite=True
+    )
+
+    local_index_cache: dict[str, VectorStoreIndex] = {}
diff --git a/docling_mcp/tools/generation.py b/docling_mcp/tools/generation.py
@@ -1,4 +1,5 @@
 import hashlib
+import os
 from io import BytesIO
 
 # from bs4 import BeautifulSoup  # , NavigableString, PageElement, Tag
@@ -506,3 +507,91 @@ def add_table_in_html_format_to_docling_document(
         )
 
     return f"Added table to a document with key: {document_key}"
+
+
+if (
+    os.getenv("RAG_ENABLED") == "true"
+    and os.getenv("OLLAMA_MODEL") != ""
+    and os.getenv("EMBEDDING_MODEL") != ""
+):
+    import json
+
+    from llama_index.core import Document, StorageContext, VectorStoreIndex
+
+    from docling_mcp.shared import local_index_cache, milvus_vector_store, node_parser
+
+    @mcp.tool()
+    def export_docling_document_to_vector_db(document_key: str) -> str:
+        """
+        Exports a document from the local document cache to a vector database for search capabilities.
+
+        This tool converts a Docling document that exists in the local cache into markdown format,
+        then loads it into a vector database index. This allows the document to be searched using
+        semantic search techniques.
+
+        Args:
+            document_key (str): The unique identifier for the document in the local cache.
+
+        Returns:
+            str: A confirmation message indicating the document was successfully indexed.
+
+        Raises:
+            ValueError: If the specified document_key does not exist in the local cache.
+
+        Example:
+            export_docling_document_to_vector_db("doc123")
+        """
+        if document_key not in local_document_cache:
+            doc_keys = ", ".join(local_document_cache.keys())
+            raise ValueError(
+                f"document-key: {document_key} is not found. Existing document-keys are: {doc_keys}"
+            )
+
+        docling_document: DoclingDocument = local_document_cache[document_key]
+        document_dict: dict = docling_document.export_to_dict()
+        document_json: str = json.dumps(document_dict)
+
+        document = Document(
+            text=document_json,
+            metadata={"filename": docling_document.name},
+        )
+
+        index = VectorStoreIndex.from_documents(
+            documents=[document],
+            transformations=[node_parser],
+            storage_context=StorageContext.from_defaults(
+                vector_store=milvus_vector_store
+            ),
+        )
+
+        index.insert(document)
+
+        local_index_cache["milvus_index"] = index
+
+        return f"Successful initialisation for document with id {document_key}"
+
+    @mcp.tool()
+    def search_documents(query: str) -> str:
+        """
+        Searches through previously uploaded and indexed documents using semantic search.
+
+        This function retrieves relevant information from documents that have been processed
+        and added to the vector database. It uses semantic similarity to find content that
+        best matches the query, rather than simple keyword matching.
+
+        Args:
+            query (str): The search query text used to find relevant information in the indexed documents.
+
+        Returns:
+            str: A string containing the relevant contextual information retrieved from the documents
+                 that best matches the query.
+
+        Example:
+            search_documents("What are the main findings about climate change?")
+        """
+        index = local_index_cache["milvus_index"]
+
+        query_engine = index.as_query_engine()
+        response = query_engine.query(query)
+
+        return response.response
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,9 +29,18 @@ requires-python = ">=3.10"
 dependencies = [
     "docling~=2.25",
     "httpx>=0.28.1",
+    "llama-index-core>=0.12.28",
+    "llama-index-embeddings-huggingface>=0.5.2",
+    "llama-index-embeddings-openai>=0.3.1",
+    "llama-index-llms-ollama>=0.5.4",
+    "llama-index-node-parser-docling>=0.3.1",
+    "llama-index-readers-docling>=0.3.2",
+    "llama-index-readers-file>=0.4.7",
+    "llama-index-vector-stores-milvus>=0.7.2",
     "mcp[cli]>=1.4.0",
     "pydantic~=2.10",
     "pydantic-settings~=2.4",
+    "python-dotenv>=1.1.0",
 ]
 
 [project.optional-dependencies]