Separated and cleaned references, removed images from report

This commit is contained in:
Viswamedha Nalabotu 2026-03-15 21:11:37 +00:00
parent 5fca572263
commit 33e6422f08
7 changed files with 166 additions and 104 deletions

4
report/.gitignore vendored
View file

@ -4,4 +4,6 @@
*.out *.out
*.toc *.toc
*.pdf *.pdf
*.gz *.gz
*.bbl
*.blg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 316 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 76 KiB

143
report/references.bib Normal file
View file

@ -0,0 +1,143 @@
@misc{anthropic2024mcp,
author = {{Anthropic}},
title = {Model Context Protocol (MCP) Specification},
year = {2024},
howpublished = {\url{https://modelcontextprotocol.io}},
note = {Accessed: 2026-03-09}
}
@misc{huggingface2024mcp,
author = {{Hugging Face}},
title = {Introduction to Model Context Protocol (MCP)},
year = {2024},
howpublished = {\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}},
note = {Accessed: 2026-03-09}
}
@misc{langgraph2024,
author = {{LangChain}},
title = {LangGraph: Building Stateful, Multi-agent Applications with LLMs},
year = {2024},
howpublished = {\url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}},
note = {Accessed: 2026-03-09}
}
@misc{meta2024llama3,
author = {{Meta AI}},
title = {Llama 3: Open-weight Large Language Models},
year = {2024},
howpublished = {\url{https://llama.meta.com/llama3/}},
note = {Accessed: 2026-03-09}
}
@misc{pgvector2024,
author = {{PostgreSQL Global Development Group}},
title = {pgvector: Open-source Vector Similarity Search for PostgreSQL},
year = {2024},
howpublished = {\url{https://github.com/pgvector/pgvector}},
note = {Accessed: 2026-03-09}
}
@misc{pinecone2023rag,
author = {{Pinecone}},
title = {Retrieval Augmented Generation (RAG) and Semantic Search},
year = {2023},
howpublished = {\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}},
note = {Accessed: 2026-03-09}
}
@misc{dettmers2023bitsandbytes,
author = {Dettmers, Tim},
title = {4-bit Quantization and Bitsandbytes for LLMs},
year = {2023},
howpublished = {\url{https://huggingface.co/blog/4bit-transformers-bitsandbytes}},
note = {Accessed: 2026-03-09}
}
@misc{vllm2024,
author = {{vLLM Team}},
title = {High-Throughput Serving with PagedAttention},
year = {2024},
howpublished = {\url{https://vllm.ai}},
note = {Accessed: 2026-03-09}
}
@misc{channels2024docs,
author = {{Django Software Foundation}},
title = {Django Channels Documentation},
year = {2024},
howpublished = {\url{https://channels.readthedocs.io/en/stable/}},
note = {Accessed: 2026-03-09}
}
@misc{django2024docs,
author = {{Django Software Foundation}},
title = {Django Documentation},
year = {2024},
howpublished = {\url{https://docs.djangoproject.com/}},
note = {Accessed: 2026-03-09}
}
@misc{drf2024docs,
author = {{Encode OSS}},
title = {Django REST Framework Documentation},
year = {2024},
howpublished = {\url{https://www.django-rest-framework.org/}},
note = {Accessed: 2026-03-09}
}
@misc{celery2024docs,
author = {{Celery Project}},
title = {Celery Documentation},
year = {2024},
howpublished = {\url{https://docs.celeryq.dev/}},
note = {Accessed: 2026-03-09}
}
@misc{redis2024docs,
author = {{Redis Ltd.}},
title = {Redis Documentation},
year = {2024},
howpublished = {\url{https://redis.io/docs/}},
note = {Accessed: 2026-03-09}
}
@misc{fastapi2024docs,
author = {{FastAPI}},
title = {FastAPI Documentation},
year = {2024},
howpublished = {\url{https://fastapi.tiangolo.com/}},
note = {Accessed: 2026-03-09}
}
@misc{sbert2024docs,
author = {{UKPLab / SBERT}},
title = {Sentence-Transformers Documentation},
year = {2024},
howpublished = {\url{https://www.sbert.net/}},
note = {Accessed: 2026-03-09}
}
@misc{llamacpp2024,
author = {{ggml-org}},
title = {llama.cpp Documentation},
year = {2024},
howpublished = {\url{https://github.com/ggml-org/llama.cpp}},
note = {Accessed: 2026-03-09}
}
@misc{llamacpppython2024,
author = {Abetlen},
title = {llama-cpp-python Documentation},
year = {2024},
howpublished = {\url{https://github.com/abetlen/llama-cpp-python}},
note = {Accessed: 2026-03-09}
}
@misc{pytorch2024docs,
author = {{PyTorch Team}},
title = {PyTorch Documentation},
year = {2024},
howpublished = {\url{https://pytorch.org/docs/}},
note = {Accessed: 2026-03-09}
}

View file

@ -2,7 +2,7 @@
\usepackage[utf8]{inputenc} \usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc} \usepackage[T1]{fontenc}
\usepackage{lmodern} \usepackage{lmodern}
\usepackage[a4paper,margin=1in]{geometry} \usepackage[a4paper,margin=0.75in]{geometry}
\usepackage{longtable} \usepackage{longtable}
\usepackage{booktabs} \usepackage{booktabs}
\usepackage{array} \usepackage{array}
@ -13,6 +13,7 @@
\usepackage[hidelinks]{hyperref} \usepackage[hidelinks]{hyperref}
\usepackage{tabularx} \usepackage{tabularx}
\usepackage{xurl} \usepackage{xurl}
\usepackage[numbers,sort&compress]{natbib}
% Report-style paragraph spacing % Report-style paragraph spacing
\setlength{\parindent}{0pt} \setlength{\parindent}{0pt}
@ -61,9 +62,9 @@ User & j.thompson@example.com & password \\
\end{tabular} \end{tabular}
\end{center} \end{center}
\textit{Note: I will try to keep the public website available, but the GPU node \textit{Note: The public site should always be available, but the GPU node
runs on my home PC and may occasionally go offline. For reliable testing, runs on my PC and can go offline. For reliable testing,
I recommend running the system locally on a machine with a CUDA-enabled GPU.} I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}
Manager registration code (for signup): \texttt{MANAGER2026} Manager registration code (for signup): \texttt{MANAGER2026}
@ -191,7 +192,7 @@ contextual reasoning, and adaptive response generation, making them
well-suited for interactive, role-aware training scenarios. Unlike well-suited for interactive, role-aware training scenarios. Unlike
static documentation, LLM-driven systems can dynamically tailor static documentation, LLM-driven systems can dynamically tailor
explanations and guidance based on a user's specific role and prior explanations and guidance based on a user's specific role and prior
knowledge. knowledge \cite{meta2024llama3,langgraph2024}.
Rather than relying on a monolithic chatbot, Dynavera employs a Rather than relying on a monolithic chatbot, Dynavera employs a
collection of specialized, collaborating agents. This modular approach collection of specialized, collaborating agents. This modular approach
@ -254,13 +255,13 @@ enable scalable, context-aware onboarding:
objectives that exceed the capability of a single monolithic model. objectives that exceed the capability of a single monolithic model.
Within Dynavera, this enables separation of instructional delivery, Within Dynavera, this enables separation of instructional delivery,
contextual reasoning, knowledge retrieval, and evaluation, improving contextual reasoning, knowledge retrieval, and evaluation, improving
modularity, explainability, and system adaptability. modularity, explainability, and system adaptability \cite{langgraph2024}.
\item \item
Retrieval-Augmented Generation (RAG): Training responses are grounded Retrieval-Augmented Generation (RAG): Training responses are grounded
in authoritative, organization-specific documentation rather than in authoritative, organization-specific documentation rather than
relying solely on a model's parametric knowledge. This ensures factual relying solely on a model's parametric knowledge. This ensures factual
accuracy, contextual relevance, and rapid adaptability as accuracy, contextual relevance, and rapid adaptability as
organizational knowledge evolves. organizational knowledge evolves \cite{pinecone2023rag}.
\end{itemize} \end{itemize}
To address data privacy and deployment constraints, Dynavera prioritizes To address data privacy and deployment constraints, Dynavera prioritizes
@ -268,7 +269,7 @@ local inference using quantized open-weight models (e.g., Llama 3 in
GGUF format). This design choice reduces dependency on external cloud GGUF format). This design choice reduces dependency on external cloud
APIs, supports offline or air-gapped environments, and aligns with APIs, supports offline or air-gapped environments, and aligns with
enterprise privacy requirements while maintaining acceptable inference enterprise privacy requirements while maintaining acceptable inference
performance. performance \cite{meta2024llama3,dettmers2023bitsandbytes,llamacpp2024}.
\subsection{Positioning Against Alternative \subsection{Positioning Against Alternative
Approaches}\label{positioning-against-alternative-approaches} Approaches}\label{positioning-against-alternative-approaches}
@ -371,11 +372,13 @@ MCP Router & Python & Provides a standardized interface for agents to query data
This stack was selected to balance modularity, rapid iteration, and production readiness. This stack was selected to balance modularity, rapid iteration, and production readiness.
A decoupled frontend-backend architecture lets the UI and API evolve independently, while PostgreSQL A decoupled frontend-backend architecture lets the UI and API evolve independently, while PostgreSQL
with pgvector provides one ACID-compliant store for both relational state and vector retrieval. with pgvector provides one ACID-compliant store for both relational state and vector retrieval
\cite{django2024docs,drf2024docs,pgvector2024}.
To preserve performance and control, orchestration is implemented in native Python rather than heavier To preserve performance and control, orchestration is implemented in native Python rather than heavier
framework abstractions such as LangChain. This keeps agent state handling explicit, reduces latency in the WebSocket loop, framework abstractions such as LangChain. This keeps agent state handling explicit, reduces latency in the WebSocket loop,
and supports local execution, data ownership, and architectural transparency during early-stage development. and supports local execution, data ownership, and architectural transparency during early-stage development
\cite{langgraph2024,channels2024docs}.
\subsection{Design Philosophy: The Distributed Agentic \subsection{Design Philosophy: The Distributed Agentic
Pattern}\label{design-philosophy-the-distributed-agentic-pattern} Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
@ -383,7 +386,7 @@ Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
Dynavera leverages the Model Context Protocol (MCP) to solve the Dynavera leverages the Model Context Protocol (MCP) to solve the
"context gap" in corporate onboarding. Rather than providing the LLM "context gap" in corporate onboarding. Rather than providing the LLM
with a static, bloated prompt, the system utilizes a Sidecar Tooling with a static, bloated prompt, the system utilizes a Sidecar Tooling
approach: approach \cite{anthropic2024mcp,huggingface2024mcp}:
\begin{itemize} \begin{itemize}
\item \item
@ -437,13 +440,13 @@ while orchestration-time interaction uses Django Channels over
WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This
allows the platform to handle both CRUD-style workflows and allows the platform to handle both CRUD-style workflows and
long-running, stateful agent interactions without forcing either pattern long-running, stateful agent interactions without forcing either pattern
into the other. into the other \cite{drf2024docs,channels2024docs}.
For ingestion, the backend follows an asynchronous execution path: For ingestion, the backend follows an asynchronous execution path:
uploaded files are stored as TrainingFile records, and a post-save uploaded files are stored as TrainingFile records, and a post-save
trigger enqueues background processing through Celery (Redis broker). trigger enqueues background processing through Celery (Redis broker).
This prevents heavy preprocessing from blocking request-response latency This prevents heavy preprocessing from blocking request-response latency
on the main web process. on the main web process \cite{celery2024docs,redis2024docs}.
Persistence is model-driven and traceable. Session state, progress, Persistence is model-driven and traceable. Session state, progress,
generated onboarding structures, and interaction events are stored in generated onboarding structures, and interaction events are stored in
@ -480,14 +483,14 @@ batches long content, and calls the GPU service at /v1/semantic-chunk.
The service performs sentence-level semantic breakpoint detection using The service performs sentence-level semantic breakpoint detection using
embedding-distance thresholds, then returns coherent chunks with embedding-distance thresholds, then returns coherent chunks with
embeddings. This avoids naive fixed-size splits that can break context embeddings. This avoids naive fixed-size splits that can break context
mid-concept. mid-concept \cite{sbert2024docs,fastapi2024docs}.
\underline{Vector storage and retrieval with pgvector}\\ \underline{Vector storage and retrieval with pgvector}\\
Returned chunk embeddings are stored in RoleRagDocument.embedding (768 Returned chunk embeddings are stored in RoleRagDocument.embedding (768
dimensions) in PostgreSQL using pgvector, linked relationally to role dimensions) in PostgreSQL using pgvector, linked relationally to role
and source file metadata. Retrieval is performed in SQL using and source file metadata. Retrieval is performed in SQL using
cosine-distance ranking and top-k selection, allowing role filtering and cosine-distance ranking and top-k selection, allowing role filtering and
similarity search in one query path. similarity search in one query path \cite{pgvector2024}.
\subsubsection{Agent Orchestration Workflow \subsubsection{Agent Orchestration Workflow
(Simplified)}\label{agent-orchestration-workflow-simplified} (Simplified)}\label{agent-orchestration-workflow-simplified}
@ -645,95 +648,9 @@ practical manner. While this project serves as a proof-of-concept, the
modular nature of the specialist agents provides a clear path for future modular nature of the specialist agents provides a clear path for future
expansion into more nuanced, multi-modal onboarding scenarios. expansion into more nuanced, multi-modal onboarding scenarios.
\begin{figure*}[b]
\centering
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/home-page.png}
\caption{Home page of Dynavera.}
\end{figure*}
\begin{figure*}[b]
\centering
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/organization-page.png}
\caption{Organization management view.}
\end{figure*}
\begin{figure*}[b]
\centering
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-loading-page.png}
\caption{Onboarding generation/loading state.}
\end{figure*}
\begin{figure*}[b]
\centering
\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-content-page.png}
\caption{Onboarding content delivery view.}
\end{figure*}
\section{References}\label{references} \section{References}\label{references}
\bibliographystyle{unsrtnat}
\begin{itemize} \bibliography{references}
\item
Anthropic (2024). Model Context Protocol (MCP) Specification.
Available at: \url{https://modelcontextprotocol.io} (Accessed: 9 March
2026).
\item
Hugging Face (2024). Introduction to Model Context Protocol (MCP).
Available at:
\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}
(Accessed: 9 March 2026).
\item
LangChain (2024). LangGraph: Building Stateful, Multi-agent
Applications with LLMs. Available at:
\url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}
(Accessed: 9 March 2026).
\item
Meta AI (2024). Llama 3: Open-weight Large Language Models. Available
at: \url{https://llama.meta.com/llama3/} (Accessed: 9 March 2026).
\item
PostgreSQL Global Development Group (2024). pgvector: Open-source
vector similarity search for PostgreSQL. Available at:
\url{https://github.com/pgvector/pgvector} (Accessed: 9 March 2026).
\item
Pinecone (2023). Retrieval Augmented Generation (RAG) and Semantic
Search. Available at:
\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}
(Accessed: 9 March 2026).
\item
Dettmers, T. (2023). 4-bit Quantization and Bitsandbytes for LLMs.
Available at:
\url{https://huggingface.co/blog/4bit-transformers-bitsandbytes} (Accessed:
9 March 2026).
\item
vLLM Team (2024). High-Throughput Serving with PagedAttention.
Available at: \url{https://vllm.ai} (Accessed: 9 March 2026).
\item
Django Software Foundation (2024). Django Channels: Real-time
WebSockets for Python. Available at:
\url{https://channels.readthedocs.io/en/stable/} (Accessed: 9 March 2026).
\item
Django Software Foundation (2024). Django Documentation.
Available at: \url{https://docs.djangoproject.com/} (Accessed: 9 March 2026).
\item
Encode OSS (2024). Django REST framework Documentation.
Available at: \url{https://www.django-rest-framework.org/} (Accessed: 9 March 2026).
\item
Celery Project (2024). Celery Documentation. Available at: \url{https://docs.celeryq.dev/} (Accessed: 9 March 2026).
\item
Redis Ltd. (2024). Redis Documentation. Available at: \url{https://redis.io/docs/} (Accessed: 9 March 2026).
\item
FastAPI (2024). FastAPI Documentation. Available at: \url{https://fastapi.tiangolo.com/} (Accessed: 9 March 2026).
\item
UKPLab / SBERT (2024). Sentence-Transformers Documentation.
Available at: \url{https://www.sbert.net/} (Accessed: 9 March 2026).
\item
Abetlen (2024). llama-cpp-python Documentation.
Available at: \url{https://github.com/abetlen/llama-cpp-python} (Accessed: 9 March 2026).
\item
ggml-org (2024). llama.cpp Documentation.
Available at: \url{https://github.com/ggml-org/llama.cpp} (Accessed: 9 March 2026).
\item
PyTorch Team (2024). PyTorch Documentation. Available at: \url{https://pytorch.org/docs/} (Accessed: 9 March 2026).
\end{itemize}
\end{document} \end{document}