Separated and cleaned references, removed images from report
This commit is contained in:
parent
5fca572263
commit
33e6422f08
7 changed files with 166 additions and 104 deletions
4
report/.gitignore
vendored
4
report/.gitignore
vendored
|
|
@ -4,4 +4,6 @@
|
|||
*.out
|
||||
*.toc
|
||||
*.pdf
|
||||
*.gz
|
||||
*.gz
|
||||
*.bbl
|
||||
*.blg
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 316 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 113 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 53 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 76 KiB |
143
report/references.bib
Normal file
143
report/references.bib
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
@misc{anthropic2024mcp,
|
||||
author = {{Anthropic}},
|
||||
title = {Model Context Protocol (MCP) Specification},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://modelcontextprotocol.io}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{huggingface2024mcp,
|
||||
author = {{Hugging Face}},
|
||||
title = {Introduction to Model Context Protocol (MCP)},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{langgraph2024,
|
||||
author = {{LangChain}},
|
||||
title = {LangGraph: Building Stateful, Multi-agent Applications with LLMs},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{meta2024llama3,
|
||||
author = {{Meta AI}},
|
||||
title = {Llama 3: Open-weight Large Language Models},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://llama.meta.com/llama3/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{pgvector2024,
|
||||
author = {{PostgreSQL Global Development Group}},
|
||||
title = {pgvector: Open-source Vector Similarity Search for PostgreSQL},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://github.com/pgvector/pgvector}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{pinecone2023rag,
|
||||
author = {{Pinecone}},
|
||||
title = {Retrieval Augmented Generation (RAG) and Semantic Search},
|
||||
year = {2023},
|
||||
howpublished = {\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{dettmers2023bitsandbytes,
|
||||
author = {Dettmers, Tim},
|
||||
title = {4-bit Quantization and Bitsandbytes for LLMs},
|
||||
year = {2023},
|
||||
howpublished = {\url{https://huggingface.co/blog/4bit-transformers-bitsandbytes}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{vllm2024,
|
||||
author = {{vLLM Team}},
|
||||
title = {High-Throughput Serving with PagedAttention},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://vllm.ai}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{channels2024docs,
|
||||
author = {{Django Software Foundation}},
|
||||
title = {Django Channels Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://channels.readthedocs.io/en/stable/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{django2024docs,
|
||||
author = {{Django Software Foundation}},
|
||||
title = {Django Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://docs.djangoproject.com/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{drf2024docs,
|
||||
author = {{Encode OSS}},
|
||||
title = {Django REST Framework Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://www.django-rest-framework.org/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{celery2024docs,
|
||||
author = {{Celery Project}},
|
||||
title = {Celery Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://docs.celeryq.dev/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{redis2024docs,
|
||||
author = {{Redis Ltd.}},
|
||||
title = {Redis Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://redis.io/docs/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{fastapi2024docs,
|
||||
author = {{FastAPI}},
|
||||
title = {FastAPI Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://fastapi.tiangolo.com/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{sbert2024docs,
|
||||
author = {{UKPLab / SBERT}},
|
||||
title = {Sentence-Transformers Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://www.sbert.net/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{llamacpp2024,
|
||||
author = {{ggml-org}},
|
||||
title = {llama.cpp Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://github.com/ggml-org/llama.cpp}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{llamacpppython2024,
|
||||
author = {Abetlen},
|
||||
title = {llama-cpp-python Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://github.com/abetlen/llama-cpp-python}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
||||
@misc{pytorch2024docs,
|
||||
author = {{PyTorch Team}},
|
||||
title = {PyTorch Documentation},
|
||||
year = {2024},
|
||||
howpublished = {\url{https://pytorch.org/docs/}},
|
||||
note = {Accessed: 2026-03-09}
|
||||
}
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{lmodern}
|
||||
\usepackage[a4paper,margin=1in]{geometry}
|
||||
\usepackage[a4paper,margin=0.75in]{geometry}
|
||||
\usepackage{longtable}
|
||||
\usepackage{booktabs}
|
||||
\usepackage{array}
|
||||
|
|
@ -13,6 +13,7 @@
|
|||
\usepackage[hidelinks]{hyperref}
|
||||
\usepackage{tabularx}
|
||||
\usepackage{xurl}
|
||||
\usepackage[numbers,sort&compress]{natbib}
|
||||
|
||||
% Report-style paragraph spacing
|
||||
\setlength{\parindent}{0pt}
|
||||
|
|
@ -61,9 +62,9 @@ User & j.thompson@example.com & password \\
|
|||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
\textit{Note: I will try to keep the public website available, but the GPU node
|
||||
runs on my home PC and may occasionally go offline. For reliable testing,
|
||||
I recommend running the system locally on a machine with a CUDA-enabled GPU.}
|
||||
\textit{Note: The public site should always be available, but the GPU node
|
||||
runs on my PC and can go offline. For reliable testing,
|
||||
I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}
|
||||
|
||||
Manager registration code (for signup): \texttt{MANAGER2026}
|
||||
|
||||
|
|
@ -191,7 +192,7 @@ contextual reasoning, and adaptive response generation, making them
|
|||
well-suited for interactive, role-aware training scenarios. Unlike
|
||||
static documentation, LLM-driven systems can dynamically tailor
|
||||
explanations and guidance based on a user's specific role and prior
|
||||
knowledge.
|
||||
knowledge \cite{meta2024llama3,langgraph2024}.
|
||||
|
||||
Rather than relying on a monolithic chatbot, Dynavera employs a
|
||||
collection of specialized, collaborating agents. This modular approach
|
||||
|
|
@ -254,13 +255,13 @@ enable scalable, context-aware onboarding:
|
|||
objectives that exceed the capability of a single monolithic model.
|
||||
Within Dynavera, this enables separation of instructional delivery,
|
||||
contextual reasoning, knowledge retrieval, and evaluation, improving
|
||||
modularity, explainability, and system adaptability.
|
||||
modularity, explainability, and system adaptability \cite{langgraph2024}.
|
||||
\item
|
||||
Retrieval-Augmented Generation (RAG): Training responses are grounded
|
||||
in authoritative, organization-specific documentation rather than
|
||||
relying solely on a model's parametric knowledge. This ensures factual
|
||||
accuracy, contextual relevance, and rapid adaptability as
|
||||
organizational knowledge evolves.
|
||||
organizational knowledge evolves \cite{pinecone2023rag}.
|
||||
\end{itemize}
|
||||
|
||||
To address data privacy and deployment constraints, Dynavera prioritizes
|
||||
|
|
@ -268,7 +269,7 @@ local inference using quantized open-weight models (e.g., Llama 3 in
|
|||
GGUF format). This design choice reduces dependency on external cloud
|
||||
APIs, supports offline or air-gapped environments, and aligns with
|
||||
enterprise privacy requirements while maintaining acceptable inference
|
||||
performance.
|
||||
performance \cite{meta2024llama3,dettmers2023bitsandbytes,llamacpp2024}.
|
||||
|
||||
\subsection{Positioning Against Alternative
|
||||
Approaches}\label{positioning-against-alternative-approaches}
|
||||
|
|
@ -371,11 +372,13 @@ MCP Router & Python & Provides a standardized interface for agents to query data
|
|||
|
||||
This stack was selected to balance modularity, rapid iteration, and production readiness.
|
||||
A decoupled frontend-backend architecture lets the UI and API evolve independently, while PostgreSQL
|
||||
with pgvector provides one ACID-compliant store for both relational state and vector retrieval.
|
||||
with pgvector provides one ACID-compliant store for both relational state and vector retrieval
|
||||
\cite{django2024docs,drf2024docs,pgvector2024}.
|
||||
|
||||
To preserve performance and control, orchestration is implemented in native Python rather than heavier
|
||||
framework abstractions such as LangChain. This keeps agent state handling explicit, reduces latency in the WebSocket loop,
|
||||
and supports local execution, data ownership, and architectural transparency during early-stage development.
|
||||
and supports local execution, data ownership, and architectural transparency during early-stage development
|
||||
\cite{langgraph2024,channels2024docs}.
|
||||
|
||||
\subsection{Design Philosophy: The Distributed Agentic
|
||||
Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
|
||||
|
|
@ -383,7 +386,7 @@ Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
|
|||
Dynavera leverages the Model Context Protocol (MCP) to solve the
|
||||
"context gap" in corporate onboarding. Rather than providing the LLM
|
||||
with a static, bloated prompt, the system utilizes a Sidecar Tooling
|
||||
approach:
|
||||
approach \cite{anthropic2024mcp,huggingface2024mcp}:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
|
|
@ -437,13 +440,13 @@ while orchestration-time interaction uses Django Channels over
|
|||
WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This
|
||||
allows the platform to handle both CRUD-style workflows and
|
||||
long-running, stateful agent interactions without forcing either pattern
|
||||
into the other.
|
||||
into the other \cite{drf2024docs,channels2024docs}.
|
||||
|
||||
For ingestion, the backend follows an asynchronous execution path:
|
||||
uploaded files are stored as TrainingFile records, and a post-save
|
||||
trigger enqueues background processing through Celery (Redis broker).
|
||||
This prevents heavy preprocessing from blocking request-response latency
|
||||
on the main web process.
|
||||
on the main web process \cite{celery2024docs,redis2024docs}.
|
||||
|
||||
Persistence is model-driven and traceable. Session state, progress,
|
||||
generated onboarding structures, and interaction events are stored in
|
||||
|
|
@ -480,14 +483,14 @@ batches long content, and calls the GPU service at /v1/semantic-chunk.
|
|||
The service performs sentence-level semantic breakpoint detection using
|
||||
embedding-distance thresholds, then returns coherent chunks with
|
||||
embeddings. This avoids naive fixed-size splits that can break context
|
||||
mid-concept.
|
||||
mid-concept \cite{sbert2024docs,fastapi2024docs}.
|
||||
|
||||
\underline{Vector storage and retrieval with pgvector}\\
|
||||
Returned chunk embeddings are stored in RoleRagDocument.embedding (768
|
||||
dimensions) in PostgreSQL using pgvector, linked relationally to role
|
||||
and source file metadata. Retrieval is performed in SQL using
|
||||
cosine-distance ranking and top-k selection, allowing role filtering and
|
||||
similarity search in one query path.
|
||||
similarity search in one query path \cite{pgvector2024}.
|
||||
|
||||
\subsubsection{Agent Orchestration Workflow
|
||||
(Simplified)}\label{agent-orchestration-workflow-simplified}
|
||||
|
|
@ -645,95 +648,9 @@ practical manner. While this project serves as a proof-of-concept, the
|
|||
modular nature of the specialist agents provides a clear path for future
|
||||
expansion into more nuanced, multi-modal onboarding scenarios.
|
||||
|
||||
\begin{figure*}[b]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/home-page.png}
|
||||
\caption{Home page of Dynavera.}
|
||||
\end{figure*}
|
||||
|
||||
\begin{figure*}[b]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/organization-page.png}
|
||||
\caption{Organization management view.}
|
||||
\end{figure*}
|
||||
|
||||
\begin{figure*}[b]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-loading-page.png}
|
||||
\caption{Onboarding generation/loading state.}
|
||||
\end{figure*}
|
||||
|
||||
\begin{figure*}[b]
|
||||
\centering
|
||||
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-content-page.png}
|
||||
\caption{Onboarding content delivery view.}
|
||||
\end{figure*}
|
||||
|
||||
\section{References}\label{references}
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
Anthropic (2024). Model Context Protocol (MCP) Specification.
|
||||
Available at: \url{https://modelcontextprotocol.io} (Accessed: 9 March
|
||||
2026).
|
||||
\item
|
||||
Hugging Face (2024). Introduction to Model Context Protocol (MCP).
|
||||
Available at:
|
||||
\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}
|
||||
(Accessed: 9 March 2026).
|
||||
\item
|
||||
LangChain (2024). LangGraph: Building Stateful, Multi-agent
|
||||
Applications with LLMs. Available at:
|
||||
\url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}
|
||||
(Accessed: 9 March 2026).
|
||||
\item
|
||||
Meta AI (2024). Llama 3: Open-weight Large Language Models. Available
|
||||
at: \url{https://llama.meta.com/llama3/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
PostgreSQL Global Development Group (2024). pgvector: Open-source
|
||||
vector similarity search for PostgreSQL. Available at:
|
||||
\url{https://github.com/pgvector/pgvector} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Pinecone (2023). Retrieval Augmented Generation (RAG) and Semantic
|
||||
Search. Available at:
|
||||
\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}
|
||||
(Accessed: 9 March 2026).
|
||||
\item
|
||||
Dettmers, T. (2023). 4-bit Quantization and Bitsandbytes for LLMs.
|
||||
Available at:
|
||||
\url{https://huggingface.co/blog/4bit-transformers-bitsandbytes} (Accessed:
|
||||
9 March 2026).
|
||||
\item
|
||||
vLLM Team (2024). High-Throughput Serving with PagedAttention.
|
||||
Available at: \url{https://vllm.ai} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Django Software Foundation (2024). Django Channels: Real-time
|
||||
WebSockets for Python. Available at:
|
||||
\url{https://channels.readthedocs.io/en/stable/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Django Software Foundation (2024). Django Documentation.
|
||||
Available at: \url{https://docs.djangoproject.com/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Encode OSS (2024). Django REST framework Documentation.
|
||||
Available at: \url{https://www.django-rest-framework.org/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Celery Project (2024). Celery Documentation. Available at: \url{https://docs.celeryq.dev/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Redis Ltd. (2024). Redis Documentation. Available at: \url{https://redis.io/docs/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
FastAPI (2024). FastAPI Documentation. Available at: \url{https://fastapi.tiangolo.com/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
UKPLab / SBERT (2024). Sentence-Transformers Documentation.
|
||||
Available at: \url{https://www.sbert.net/} (Accessed: 9 March 2026).
|
||||
\item
|
||||
Abetlen (2024). llama-cpp-python Documentation.
|
||||
Available at: \url{https://github.com/abetlen/llama-cpp-python} (Accessed: 9 March 2026).
|
||||
\item
|
||||
ggml-org (2024). llama.cpp Documentation.
|
||||
Available at: \url{https://github.com/ggml-org/llama.cpp} (Accessed: 9 March 2026).
|
||||
\item
|
||||
PyTorch Team (2024). PyTorch Documentation. Available at: \url{https://pytorch.org/docs/} (Accessed: 9 March 2026).
|
||||
\end{itemize}
|
||||
\bibliographystyle{unsrtnat}
|
||||
\bibliography{references}
|
||||
|
||||
\end{document}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue