Separated and cleaned references, removed images from report

2026-03-15 21:11:37 +00:00 · 2026-03-15 21:11:37 +00:00 · 33e6422f08
commit 33e6422f08
parent 5fca572263
7 changed files with 166 additions and 104 deletions
--- a/report/.gitignore
+++ b/report/.gitignore
@ -4,4 +4,6 @@
 *.out
 *.toc
 *.pdf
-*.gz
+*.gz
+*.bbl
+*.blg
--- a/report/diagrams/home-page.png
+++ b/report/diagrams/home-page.png
--- a/report/diagrams/onboarding-content-page.png
+++ b/report/diagrams/onboarding-content-page.png
--- a/report/diagrams/onboarding-loading-page.png
+++ b/report/diagrams/onboarding-loading-page.png
--- a/report/diagrams/organization-page.png
+++ b/report/diagrams/organization-page.png
--- a/report/references.bib
+++ b/report/references.bib
@ -0,0 +1,143 @@
+@misc{anthropic2024mcp,
+  author       = {{Anthropic}},
+  title        = {Model Context Protocol (MCP) Specification},
+  year         = {2024},
+  howpublished = {\url{https://modelcontextprotocol.io}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{huggingface2024mcp,
+  author       = {{Hugging Face}},
+  title        = {Introduction to Model Context Protocol (MCP)},
+  year         = {2024},
+  howpublished = {\url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{langgraph2024,
+  author       = {{LangChain}},
+  title        = {LangGraph: Building Stateful, Multi-agent Applications with LLMs},
+  year         = {2024},
+  howpublished = {\url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{meta2024llama3,
+  author       = {{Meta AI}},
+  title        = {Llama 3: Open-weight Large Language Models},
+  year         = {2024},
+  howpublished = {\url{https://llama.meta.com/llama3/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{pgvector2024,
+  author       = {{PostgreSQL Global Development Group}},
+  title        = {pgvector: Open-source Vector Similarity Search for PostgreSQL},
+  year         = {2024},
+  howpublished = {\url{https://github.com/pgvector/pgvector}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{pinecone2023rag,
+  author       = {{Pinecone}},
+  title        = {Retrieval Augmented Generation (RAG) and Semantic Search},
+  year         = {2023},
+  howpublished = {\url{https://www.pinecone.io/learn/retrieval-augmented-generation/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{dettmers2023bitsandbytes,
+  author       = {Dettmers, Tim},
+  title        = {4-bit Quantization and Bitsandbytes for LLMs},
+  year         = {2023},
+  howpublished = {\url{https://huggingface.co/blog/4bit-transformers-bitsandbytes}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{vllm2024,
+  author       = {{vLLM Team}},
+  title        = {High-Throughput Serving with PagedAttention},
+  year         = {2024},
+  howpublished = {\url{https://vllm.ai}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{channels2024docs,
+  author       = {{Django Software Foundation}},
+  title        = {Django Channels Documentation},
+  year         = {2024},
+  howpublished = {\url{https://channels.readthedocs.io/en/stable/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{django2024docs,
+  author       = {{Django Software Foundation}},
+  title        = {Django Documentation},
+  year         = {2024},
+  howpublished = {\url{https://docs.djangoproject.com/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{drf2024docs,
+  author       = {{Encode OSS}},
+  title        = {Django REST Framework Documentation},
+  year         = {2024},
+  howpublished = {\url{https://www.django-rest-framework.org/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{celery2024docs,
+  author       = {{Celery Project}},
+  title        = {Celery Documentation},
+  year         = {2024},
+  howpublished = {\url{https://docs.celeryq.dev/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{redis2024docs,
+  author       = {{Redis Ltd.}},
+  title        = {Redis Documentation},
+  year         = {2024},
+  howpublished = {\url{https://redis.io/docs/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{fastapi2024docs,
+  author       = {{FastAPI}},
+  title        = {FastAPI Documentation},
+  year         = {2024},
+  howpublished = {\url{https://fastapi.tiangolo.com/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{sbert2024docs,
+  author       = {{UKPLab / SBERT}},
+  title        = {Sentence-Transformers Documentation},
+  year         = {2024},
+  howpublished = {\url{https://www.sbert.net/}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{llamacpp2024,
+  author       = {{ggml-org}},
+  title        = {llama.cpp Documentation},
+  year         = {2024},
+  howpublished = {\url{https://github.com/ggml-org/llama.cpp}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{llamacpppython2024,
+  author       = {Abetlen},
+  title        = {llama-cpp-python Documentation},
+  year         = {2024},
+  howpublished = {\url{https://github.com/abetlen/llama-cpp-python}},
+  note         = {Accessed: 2026-03-09}
+}
+
+@misc{pytorch2024docs,
+  author       = {{PyTorch Team}},
+  title        = {PyTorch Documentation},
+  year         = {2024},
+  howpublished = {\url{https://pytorch.org/docs/}},
+  note         = {Accessed: 2026-03-09}
+}
--- a/report/report.tex
+++ b/report/report.tex
@ -2,7 +2,7 @@
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage{lmodern}
-\usepackage[a4paper,margin=1in]{geometry}
+\usepackage[a4paper,margin=0.75in]{geometry}
 \usepackage{longtable}
 \usepackage{booktabs}
 \usepackage{array}
@ -13,6 +13,7 @@
 \usepackage[hidelinks]{hyperref}
 \usepackage{tabularx}
 \usepackage{xurl}
+\usepackage[numbers,sort&compress]{natbib}

 % Report-style paragraph spacing
 \setlength{\parindent}{0pt}
@ -61,9 +62,9 @@ User & j.thompson@example.com & password \\
 \end{tabular}
 \end{center}

-\textit{Note: I will try to keep the public website available, but the GPU node 
-runs on my home PC and may occasionally go offline. For reliable testing, 
-I recommend running the system locally on a machine with a CUDA-enabled GPU.}
+\textit{Note: The public site should always be available, but the GPU node 
+runs on my PC and can go offline. For reliable testing, 
+I recommend running my development compose stack on a CUDA-enabled machine with a GPU.}

 Manager registration code (for signup): \texttt{MANAGER2026}

@ -191,7 +192,7 @@ contextual reasoning, and adaptive response generation, making them
 well-suited for interactive, role-aware training scenarios. Unlike
 static documentation, LLM-driven systems can dynamically tailor
 explanations and guidance based on a user's specific role and prior
-knowledge.
+knowledge \cite{meta2024llama3,langgraph2024}.

 Rather than relying on a monolithic chatbot, Dynavera employs a
 collection of specialized, collaborating agents. This modular approach
@ -254,13 +255,13 @@ enable scalable, context-aware onboarding:
  objectives that exceed the capability of a single monolithic model.
  Within Dynavera, this enables separation of instructional delivery,
  contextual reasoning, knowledge retrieval, and evaluation, improving
-  modularity, explainability, and system adaptability.
+  modularity, explainability, and system adaptability \cite{langgraph2024}.
 \item
  Retrieval-Augmented Generation (RAG): Training responses are grounded
  in authoritative, organization-specific documentation rather than
  relying solely on a model's parametric knowledge. This ensures factual
  accuracy, contextual relevance, and rapid adaptability as
-  organizational knowledge evolves.
+  organizational knowledge evolves \cite{pinecone2023rag}.
 \end{itemize}

 To address data privacy and deployment constraints, Dynavera prioritizes
@ -268,7 +269,7 @@ local inference using quantized open-weight models (e.g., Llama 3 in
 GGUF format). This design choice reduces dependency on external cloud
 APIs, supports offline or air-gapped environments, and aligns with
 enterprise privacy requirements while maintaining acceptable inference
-performance.
+performance \cite{meta2024llama3,dettmers2023bitsandbytes,llamacpp2024}.

 \subsection{Positioning Against Alternative
 Approaches}\label{positioning-against-alternative-approaches}
@ -371,11 +372,13 @@ MCP Router & Python & Provides a standardized interface for agents to query data

 This stack was selected to balance modularity, rapid iteration, and production readiness. 
 A decoupled frontend-backend architecture lets the UI and API evolve independently, while PostgreSQL 
-with pgvector provides one ACID-compliant store for both relational state and vector retrieval.
+with pgvector provides one ACID-compliant store for both relational state and vector retrieval
+\cite{django2024docs,drf2024docs,pgvector2024}.

 To preserve performance and control, orchestration is implemented in native Python rather than heavier 
 framework abstractions such as LangChain. This keeps agent state handling explicit, reduces latency in the WebSocket loop,
-and supports local execution, data ownership, and architectural transparency during early-stage development.
+and supports local execution, data ownership, and architectural transparency during early-stage development
+\cite{langgraph2024,channels2024docs}.

 \subsection{Design Philosophy: The Distributed Agentic
 Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
@ -383,7 +386,7 @@ Pattern}\label{design-philosophy-the-distributed-agentic-pattern}
 Dynavera leverages the Model Context Protocol (MCP) to solve the
 "context gap" in corporate onboarding. Rather than providing the LLM
 with a static, bloated prompt, the system utilizes a Sidecar Tooling
-approach:
+approach \cite{anthropic2024mcp,huggingface2024mcp}:

 \begin{itemize}
 \item
@ -437,13 +440,13 @@ while orchestration-time interaction uses Django Channels over
 WebSockets at /ws/onboarding/\textless session\_uuid\textgreater/. This
 allows the platform to handle both CRUD-style workflows and
 long-running, stateful agent interactions without forcing either pattern
-into the other.
+into the other \cite{drf2024docs,channels2024docs}.

 For ingestion, the backend follows an asynchronous execution path:
 uploaded files are stored as TrainingFile records, and a post-save
 trigger enqueues background processing through Celery (Redis broker).
 This prevents heavy preprocessing from blocking request-response latency
-on the main web process.
+on the main web process \cite{celery2024docs,redis2024docs}.

 Persistence is model-driven and traceable. Session state, progress,
 generated onboarding structures, and interaction events are stored in
@ -480,14 +483,14 @@ batches long content, and calls the GPU service at /v1/semantic-chunk.
 The service performs sentence-level semantic breakpoint detection using
 embedding-distance thresholds, then returns coherent chunks with
 embeddings. This avoids naive fixed-size splits that can break context
-mid-concept.
+mid-concept \cite{sbert2024docs,fastapi2024docs}.

 \underline{Vector storage and retrieval with pgvector}\\
 Returned chunk embeddings are stored in RoleRagDocument.embedding (768
 dimensions) in PostgreSQL using pgvector, linked relationally to role
 and source file metadata. Retrieval is performed in SQL using
 cosine-distance ranking and top-k selection, allowing role filtering and
-similarity search in one query path.
+similarity search in one query path \cite{pgvector2024}.

 \subsubsection{Agent Orchestration Workflow
 (Simplified)}\label{agent-orchestration-workflow-simplified}
@ -645,95 +648,9 @@ practical manner. While this project serves as a proof-of-concept, the
 modular nature of the specialist agents provides a clear path for future
 expansion into more nuanced, multi-modal onboarding scenarios.

-\begin{figure*}[b]
-\centering
-\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/home-page.png}
-\caption{Home page of Dynavera.}
-\end{figure*}
-
-\begin{figure*}[b]
-\centering
-\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/organization-page.png}
-\caption{Organization management view.}
-\end{figure*}
-
-\begin{figure*}[b]
-\centering
-\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-loading-page.png}
-\caption{Onboarding generation/loading state.}
-\end{figure*}
-
-\begin{figure*}[b]
-\centering
-\includegraphics[width=\textwidth,height=3.2in,keepaspectratio]{diagrams/onboarding-content-page.png}
-\caption{Onboarding content delivery view.}
-\end{figure*}
-
 \section{References}\label{references}
-
-\begin{itemize}
-\item
-  Anthropic (2024). Model Context Protocol (MCP) Specification.
-  Available at: \url{https://modelcontextprotocol.io} (Accessed: 9 March
-  2026).
-\item
-  Hugging Face (2024). Introduction to Model Context Protocol (MCP).
-  Available at:
-  \url{https://huggingface.co/learn/mcp-course/en/unit1/key-concepts}
-  (Accessed: 9 March 2026).
-\item
-  LangChain (2024). LangGraph: Building Stateful, Multi-agent
-  Applications with LLMs. Available at:
-  \url{https://docs.langchain.com/oss/python/langgraph/workflows-agents}
-  (Accessed: 9 March 2026).
-\item
-  Meta AI (2024). Llama 3: Open-weight Large Language Models. Available
-  at: \url{https://llama.meta.com/llama3/} (Accessed: 9 March 2026).
-\item
-  PostgreSQL Global Development Group (2024). pgvector: Open-source
-  vector similarity search for PostgreSQL. Available at:
-  \url{https://github.com/pgvector/pgvector} (Accessed: 9 March 2026).
-\item
-  Pinecone (2023). Retrieval Augmented Generation (RAG) and Semantic
-  Search. Available at:
-  \url{https://www.pinecone.io/learn/retrieval-augmented-generation/}
-  (Accessed: 9 March 2026).
-\item
-  Dettmers, T. (2023). 4-bit Quantization and Bitsandbytes for LLMs.
-  Available at:
-  \url{https://huggingface.co/blog/4bit-transformers-bitsandbytes} (Accessed:
-  9 March 2026).
-\item
-  vLLM Team (2024). High-Throughput Serving with PagedAttention.
-  Available at: \url{https://vllm.ai} (Accessed: 9 March 2026).
-\item
-  Django Software Foundation (2024). Django Channels: Real-time
-  WebSockets for Python. Available at:
-  \url{https://channels.readthedocs.io/en/stable/} (Accessed: 9 March 2026).
-\item
-  Django Software Foundation (2024). Django Documentation.
-  Available at: \url{https://docs.djangoproject.com/} (Accessed: 9 March 2026).
-\item
-  Encode OSS (2024). Django REST framework Documentation.
-  Available at: \url{https://www.django-rest-framework.org/} (Accessed: 9 March 2026).
-\item
-  Celery Project (2024). Celery Documentation. Available at: \url{https://docs.celeryq.dev/} (Accessed: 9 March 2026).
-\item
-  Redis Ltd. (2024). Redis Documentation. Available at: \url{https://redis.io/docs/} (Accessed: 9 March 2026).
-\item
-  FastAPI (2024). FastAPI Documentation. Available at: \url{https://fastapi.tiangolo.com/} (Accessed: 9 March 2026).
-\item
-  UKPLab / SBERT (2024). Sentence-Transformers Documentation.
-  Available at: \url{https://www.sbert.net/} (Accessed: 9 March 2026).
-\item
-  Abetlen (2024). llama-cpp-python Documentation.
-  Available at: \url{https://github.com/abetlen/llama-cpp-python} (Accessed: 9 March 2026).
-\item
-  ggml-org (2024). llama.cpp Documentation.
-  Available at: \url{https://github.com/ggml-org/llama.cpp} (Accessed: 9 March 2026).
-\item
-  PyTorch Team (2024). PyTorch Documentation. Available at: \url{https://pytorch.org/docs/} (Accessed: 9 March 2026).
-\end{itemize}
+\bibliographystyle{unsrtnat}
+\bibliography{references}

 \end{document}