A Vector search is a technique to find similar data points based on the their numerical representation, known as embeddings in a multi-dimensional space. Instead of performing keyword based search, vector search relies on semantic similarity.
For detailed explanation and capabilities : Oracle AI Vector Search
RAG, or Retrieval-Augmented Generation, is a technique that enhances LLM (Large Language Model) responses by dynamically retrieving relevant data from an external source —like a vector DB like Oracle 23ai — before generating an answer. Instead of relying solely on pre-trained knowledge, RAG first performs a vector search to find the most relevant documents and then uses that context to generate a more accurate and informed response. This makes RAG particularly useful for applications like chatbots, search engines, and AI-driven assistants that require up-to-date or domain-specific knowledge.
Here’s a basic representation:
[Knowledge Base] — ->[Retrieval] — → [LLM] — —> [Generation] — → [Output]
curl -L -o train.csv "https://huggingface.co/datasets/prithivMLmods/Content-Articles/resolve/main/datasets/train.csv"
unzip all-MiniLM-L12-v2_augmented.zip
sqlplus / as sysdba
ALTER SESSION SET CONTAINER=<pdb>;
GRANT DB_DEVELOPER_ROLE, CREATE MINING MODEL TO <dbuser>;
CREATE OR REPLACE DIRECTORY DM_DUMP AS '<directory_path>';
GRANT READ ON DIRECTORY DM_DUMP TO <dbuser>;
GRANT WRITE ON DIRECTORY DM_DUMP TO <dbuser>;
EXIT;
sqlplus <dbuser>/<password>@<pdb>
EXEC DBMS_VECTOR.DROP_ONNX_MODEL(model_name => 'ALL_MINILM_L12_V2', force => true);
BEGIN
DBMS_VECTOR.LOAD_ONNX_MODEL(
directory => 'DM_DUMP',
file_name => 'all_MiniLM_L12_v2.onnx',
model_name => 'ALL_MINILM_L12_V2',
metadata => JSON('{"function" : "embedding", "embeddingOutput" : "embedding", "input": {"input": ["DATA"]}}'));
END;
/
SELECT model_name, algorithm, mining_function
FROM user_mining_models
WHERE model_name='ALL_MINILM_L12_V2';
SELECT VECTOR_EMBEDDING(ALL_MINILM_L12_V2 USING 'The quick brown fox jumped' AS DATA) AS embedding;
EXEC DBMS_VECTOR.DROP_ONNX_MODEL(model_name => 'ALL_MINILM_L12_V2', force => true);
DECLARE
m_blob BLOB DEFAULT EMPTY_BLOB();
m_src_loc BFILE;
BEGIN
DBMS_LOB.CREATETEMPORARY(m_blob, FALSE);
m_src_loc := BFILENAME('DM_DUMP', 'all_MiniLM_L12_v2.onnx');
DBMS_LOB.FILEOPEN(m_src_loc, DBMS_LOB.FILE_READONLY);
DBMS_LOB.LOADFROMFILE(m_blob, m_src_loc, DBMS_LOB.GETLENGTH(m_src_loc));
DBMS_LOB.CLOSE(m_src_loc);
DBMS_DATA_MINING.IMPORT_ONNX_MODEL('ALL_MINILM_L12_V2', m_blob, JSON('{"function":"embedding", "embeddingOutput":"embedding", "input":{"input": ["DATA"]}}'));
DBMS_LOB.FREETEMPORARY(m_blob);
END;
/
LOAD CONTENT_ARTICLE '/Users/mvrajakishore/Downloads/data.csv' NEW;
ALTER TABLE CONTENT_ARTICLE ADD (
CONTENT_ARTICLE_VECTOR VECTOR
);
UPDATE CONTENT_ARTICLE
SET CONTENT_ARTICLE_VECTOR = VECTOR_EMBEDDING(ALL_MINILM_L12_V2 USING ABSTRACT AS DATA);
COMMIT;
DECLARE search_text VARCHAR2(100) := 'Deep Learning';
BEGIN
FOR rec IN (
SELECT title, abstract
FROM CONTENT_ARTICLE
ORDER BY VECTOR_DISTANCE(
content_article_vector,
VECTOR_EMBEDDING(ALL_MINILM_L12_V2 USING search_text AS DATA)
)
FETCH FIRST 5 ROWS ONLY
) LOOP
DBMS_OUTPUT.PUT_LINE(rec.title);
END LOOP;
END;
/
VARIABLE search_text VARCHAR2(100);
EXEC :search_text := 'Deep Learning';
SELECT VECTOR_DISTANCE(content_article_vector, VECTOR_EMBEDDING(ALL_MINILM_L12_V2 USING :search_text AS DATA)) AS distance,
title,
abstract
FROM CONTENT_ARTICLE
ORDER BY 1
FETCH APPROXIMATE FIRST 5 ROWS ONLY;
- Store text as vector embeddings in a vector database (e.g., Oracle, Pinecone, FAISS).
- Perform a vector similarity search to fetch the most relevant data.
-
Enhance LLM responses by adding retrieved data as context.
-
Example:
"Using this context: <retrieved articles>, answer: <user question>"
- The LLM generates responses based on both its knowledge and the retrieved data.
RAG enhances LLM capabilities by combining retrieval + generation dynamically.
pip install python-dotenv cohere oracledb
import getpass
import os
from dotenv import load_dotenv
import oracledb
import cohere
load_dotenv()
cohere_api = os.environ.get("COHERE_API_KEY")
pw = os.environ.get("PYTHON_PASSWORD")
dbuser = os.environ.get("DB_USER")
conn_str = os.environ.get("CONNECT_STRING")
connection = oracledb.connect(
user=dbuser,
password=pw,
dsn=conn_str
)
print("Successfully connected to Oracle Database")
cursor = connection.cursor()
user_question = "Can you explain Thermodynamics expansion?"
query = """
SELECT title, abstract
FROM CONTENT_ARTICLE
ORDER BY VECTOR_DISTANCE(
content_article_vector,
VECTOR_EMBEDDING(ALL_MINILM_L12_V2 USING :query_text AS DATA)
)
FETCH FIRST 3 ROWS ONLY
"""
cursor.execute(query, {'query_text': user_question})
relevant_articles = cursor.fetchall()
retrieved_context = "\n\n".join([f"{title}: {abstract}" for title, abstract in relevant_articles])
co = cohere.Client(cohere_api)
if retrieved_context.strip():
response = co.chat(
model="command-r-plus",
message=f"Using this context:\n{retrieved_context}\n\nAnswer the question: {user_question}"
)
print(response.text)
else:
print("No relevant articles found.")
cursor.close()
connection.close()