diff --git a/notebooks/external-model-testing.ipynb b/notebooks/external-model-testing.ipynb
new file mode 100644
index 0000000..d354333
--- /dev/null
+++ b/notebooks/external-model-testing.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0910db83",
+   "metadata": {},
+   "source": [
+    "# Model Testing with GPT4ALL running locally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "47cacfc9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports\n",
+    "import json\n",
+    "import requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "484cfebc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Variables for model response\n",
+    "API_URL = \"http://localhost:4891/v1/chat/completions\"\n",
+    "HEADERS = {\"Content-Type\": \"application/json\"}\n",
+    "MODEL = \"DeepSeek-R1-Distill-Qwen-7B\"\n",
+    "MAX_TOKENS = 2000\n",
+    "TEMPERATURE = 0.28"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "90b9b1f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "content = \"Teach me computer vision\"\n",
+    "data = {\"model\": MODEL,\"messages\":[{\"role\":\"user\",\"content\": content}],\"max_tokens\": MAX_TOKENS,\"temperature\": TEMPERATURE}\n",
+    "response = requests.post(API_URL, json = data, headers=HEADERS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "88a77498",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'{\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"<think>\\\\n\\\\n</think>\\\\n\\\\nComputer vision is a field of artificial intelligence that focuses on enabling computers to interpret and understand visual information from the world. It involves training algorithms, typically using deep learning techniques, to perform tasks such as object recognition, image segmentation, feature extraction, and more.\\\\n\\\\nHere’s an introduction to get you started:\\\\n\\\\n---\\\\n\\\\n### **1. What is Computer Vision?**\\\\nComputer vision mimics human visual perception by analyzing images or video data to extract meaningful information. It relies heavily on machine learning and deep learning techniques like convolutional neural networks (CNNs) to perform tasks such as:\\\\n- Object detection\\\\n- Image classification\\\\n- Face recognition\\\\n- Medical image analysis\\\\n- Autonomous vehicle navigation\\\\n\\\\n---\\\\n\\\\n### **2. Key Concepts in Computer Vision**\\\\n#### **Image Representation**\\\\n- **Pixels**: The basic unit of an image, represented by numerical values indicating color and brightness.\\\\n- **Channels**: Color images have multiple channels (e.g., RGB has red, green, blue channels).\\\\n\\\\n#### **Common Tasks**\\\\n1. **Object Detection**:\\\\n   - Identify the presence and location of objects in an image.\\\\n   - Example: Bounding box regression.\\\\n\\\\n2. **Classification**:\\\\n   - Categorize images into predefined classes (e.g., cat vs. dog).\\\\n\\\\n3. **Segmentation**:\\\\n   - Partition an image into segments, each representing a different object or region.\\\\n\\\\n4. **Feature Extraction**:\\\\n   - Identify and extract relevant patterns from images for further analysis.\\\\n\\\\n---\\\\n\\\\n### **3. Tools and Libraries**\\\\nTo get started with computer vision, you’ll need tools like OpenCV (Open Source Computer Vision) or TensorFlow/Keras for building models.\\\\n\\\\n#### **OpenCV**\\\\n- A popular open-source library for image processing.\\\\n- Features:\\\\n  - Image filtering\\\\n  - Edge detection\\\\n  - Object tracking\\\\n  - Face recognition\\\\n\\\\n#### **TensorFlow/Keras**\\\\n- Frameworks built on top of TensorFlow, ideal for deep learning tasks.\\\\n- Easy to use and widely adopted.\\\\n\\\\n---\\\\n\\\\n### **4. Getting Started with Computer Vision**\\\\n\\\\n#### **Step 1: Learn the Basics**\\\\nStart by understanding fundamental concepts like pixels, image processing techniques, and basic computer vision algorithms (e.g., SIFT, HOG).\\\\n\\\\n#### **Step 2: Explore Datasets**\\\\nWork with common datasets:\\\\n- CIFAR-10/100\\\\n- MNIST (handwritten digits)\\\\n- COCO (common objects in context)\\\\n\\\\n#### **Step 3: Build Simple Models**\\\\nUse pre-trained models like ResNet or VGG to classify images. For example, you can train a model to recognize cats vs. dogs.\\\\n\\\\n#### **Step 4: Experiment with Deep Learning**\\\\nTune hyperparameters (learning rate, batch size) and explore techniques like data augmentation to improve model performance.\\\\n\\\\n---\\\\n\\\\n### **5. Resources for Learning**\\\\n- **Books**:\\\\n  - *Deep Learning for Computer Vision* by Adrian Rosebrock\\\\n  - *Computer Vision: Algorithms and Applications* by Richard Szeliski\\\\n\\\\n- **Tutorials/Documentation**:\\\\n  - OpenCV官网文档 [https://docs.opencv.org](https://docs.opencv.org)\\\\n  - TensorFlow/Keras官网文档 [https://www.tensorflow.org](https://www.tensorflow.org)\\\\n\\\\n- **Online Courses**:\\\\n  - Coursera: \\\\\"Introduction to Computer Vision\\\\\" by Georgia Tech\\\\n  - Udacity: \\\\\"Deep Learning for Computer Vision\\\\\"\\\\n  - Fast.ai: Free, practical courses on computer vision.\\\\n\\\\n---\\\\n\\\\n### **6. Practice Projects**\\\\n1. **Object Detection**: Use YOLO or Mask R-CNN to detect objects in images.\\\\n2. **Image Classification**: Build a model that classifies images into predefined categories (e.g., flowers vs. vegetables).\\\\n3. **Face Recognition**: Implement face recognition using deep learning frameworks.\\\\n\\\\n---\\\\n\\\\n### **7. Keep Learning**\\\\n- Follow research papers on arXiv ([https://arxiv.org](https://arxiv.org)).\\\\n- Join communities like Reddit’s r/computervision or Stack Overflow.\\\\n- Experiment with cutting-edge models and techniques in computer vision.\\\\n\\\\n---\\\\n\\\\nWith practice and persistence, you’ll become proficient in computer vision. Start small, experiment, and most importantly, have fun!\",\"role\":\"assistant\"},\"references\":null}],\"created\":1768678056,\"id\":\"placeholder\",\"model\":\"DeepSeek-R1-Distill-Qwen-7B\",\"object\":\"chat.completion\",\"usage\":{\"completion_tokens\":861,\"prompt_tokens\":8,\"total_tokens\":869}}'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response.text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "c416905c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'choices': [{'finish_reason': 'stop',\n",
+       "   'index': 0,\n",
+       "   'logprobs': None,\n",
+       "   'message': {'content': '<think>\\n\\n</think>\\n\\nComputer vision is a field of artificial intelligence that focuses on enabling computers to interpret and understand visual information from the world. It involves training algorithms, typically using deep learning techniques, to perform tasks such as object recognition, image segmentation, feature extraction, and more.\\n\\nHere’s an introduction to get you started:\\n\\n---\\n\\n### **1. What is Computer Vision?**\\nComputer vision mimics human visual perception by analyzing images or video data to extract meaningful information. It relies heavily on machine learning and deep learning techniques like convolutional neural networks (CNNs) to perform tasks such as:\\n- Object detection\\n- Image classification\\n- Face recognition\\n- Medical image analysis\\n- Autonomous vehicle navigation\\n\\n---\\n\\n### **2. Key Concepts in Computer Vision**\\n#### **Image Representation**\\n- **Pixels**: The basic unit of an image, represented by numerical values indicating color and brightness.\\n- **Channels**: Color images have multiple channels (e.g., RGB has red, green, blue channels).\\n\\n#### **Common Tasks**\\n1. **Object Detection**:\\n   - Identify the presence and location of objects in an image.\\n   - Example: Bounding box regression.\\n\\n2. **Classification**:\\n   - Categorize images into predefined classes (e.g., cat vs. dog).\\n\\n3. **Segmentation**:\\n   - Partition an image into segments, each representing a different object or region.\\n\\n4. **Feature Extraction**:\\n   - Identify and extract relevant patterns from images for further analysis.\\n\\n---\\n\\n### **3. Tools and Libraries**\\nTo get started with computer vision, you’ll need tools like OpenCV (Open Source Computer Vision) or TensorFlow/Keras for building models.\\n\\n#### **OpenCV**\\n- A popular open-source library for image processing.\\n- Features:\\n  - Image filtering\\n  - Edge detection\\n  - Object tracking\\n  - Face recognition\\n\\n#### **TensorFlow/Keras**\\n- Frameworks built on top of TensorFlow, ideal for deep learning tasks.\\n- Easy to use and widely adopted.\\n\\n---\\n\\n### **4. Getting Started with Computer Vision**\\n\\n#### **Step 1: Learn the Basics**\\nStart by understanding fundamental concepts like pixels, image processing techniques, and basic computer vision algorithms (e.g., SIFT, HOG).\\n\\n#### **Step 2: Explore Datasets**\\nWork with common datasets:\\n- CIFAR-10/100\\n- MNIST (handwritten digits)\\n- COCO (common objects in context)\\n\\n#### **Step 3: Build Simple Models**\\nUse pre-trained models like ResNet or VGG to classify images. For example, you can train a model to recognize cats vs. dogs.\\n\\n#### **Step 4: Experiment with Deep Learning**\\nTune hyperparameters (learning rate, batch size) and explore techniques like data augmentation to improve model performance.\\n\\n---\\n\\n### **5. Resources for Learning**\\n- **Books**:\\n  - *Deep Learning for Computer Vision* by Adrian Rosebrock\\n  - *Computer Vision: Algorithms and Applications* by Richard Szeliski\\n\\n- **Tutorials/Documentation**:\\n  - OpenCV官网文档 [https://docs.opencv.org](https://docs.opencv.org)\\n  - TensorFlow/Keras官网文档 [https://www.tensorflow.org](https://www.tensorflow.org)\\n\\n- **Online Courses**:\\n  - Coursera: \"Introduction to Computer Vision\" by Georgia Tech\\n  - Udacity: \"Deep Learning for Computer Vision\"\\n  - Fast.ai: Free, practical courses on computer vision.\\n\\n---\\n\\n### **6. Practice Projects**\\n1. **Object Detection**: Use YOLO or Mask R-CNN to detect objects in images.\\n2. **Image Classification**: Build a model that classifies images into predefined categories (e.g., flowers vs. vegetables).\\n3. **Face Recognition**: Implement face recognition using deep learning frameworks.\\n\\n---\\n\\n### **7. Keep Learning**\\n- Follow research papers on arXiv ([https://arxiv.org](https://arxiv.org)).\\n- Join communities like Reddit’s r/computervision or Stack Overflow.\\n- Experiment with cutting-edge models and techniques in computer vision.\\n\\n---\\n\\nWith practice and persistence, you’ll become proficient in computer vision. Start small, experiment, and most importantly, have fun!',\n",
+       "    'role': 'assistant'},\n",
+       "   'references': None}],\n",
+       " 'created': 1768678056,\n",
+       " 'id': 'placeholder',\n",
+       " 'model': 'DeepSeek-R1-Distill-Qwen-7B',\n",
+       " 'object': 'chat.completion',\n",
+       " 'usage': {'completion_tokens': 861, 'prompt_tokens': 8, 'total_tokens': 869}}"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response_data = json.loads(response.text)\n",
+    "response_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "2553d924",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<think>\\n\\n</think>\\n\\nComputer vision is a field of artificial intelligence that focuses on enabling computers to interpret and understand visual information from the world. It involves training algorithms, typically using deep learning techniques, to perform tasks such as object recognition, image segmentation, feature extraction, and more.\\n\\nHere’s an introduction to get you started:\\n\\n---\\n\\n### **1. What is Computer Vision?**\\nComputer vision mimics human visual perception by analyzing images or video data to extract meaningful information. It relies heavily on machine learning and deep learning techniques like convolutional neural networks (CNNs) to perform tasks such as:\\n- Object detection\\n- Image classification\\n- Face recognition\\n- Medical image analysis\\n- Autonomous vehicle navigation\\n\\n---\\n\\n### **2. Key Concepts in Computer Vision**\\n#### **Image Representation**\\n- **Pixels**: The basic unit of an image, represented by numerical values indicating color and brightness.\\n- **Channels**: Color images have multiple channels (e.g., RGB has red, green, blue channels).\\n\\n#### **Common Tasks**\\n1. **Object Detection**:\\n   - Identify the presence and location of objects in an image.\\n   - Example: Bounding box regression.\\n\\n2. **Classification**:\\n   - Categorize images into predefined classes (e.g., cat vs. dog).\\n\\n3. **Segmentation**:\\n   - Partition an image into segments, each representing a different object or region.\\n\\n4. **Feature Extraction**:\\n   - Identify and extract relevant patterns from images for further analysis.\\n\\n---\\n\\n### **3. Tools and Libraries**\\nTo get started with computer vision, you’ll need tools like OpenCV (Open Source Computer Vision) or TensorFlow/Keras for building models.\\n\\n#### **OpenCV**\\n- A popular open-source library for image processing.\\n- Features:\\n  - Image filtering\\n  - Edge detection\\n  - Object tracking\\n  - Face recognition\\n\\n#### **TensorFlow/Keras**\\n- Frameworks built on top of TensorFlow, ideal for deep learning tasks.\\n- Easy to use and widely adopted.\\n\\n---\\n\\n### **4. Getting Started with Computer Vision**\\n\\n#### **Step 1: Learn the Basics**\\nStart by understanding fundamental concepts like pixels, image processing techniques, and basic computer vision algorithms (e.g., SIFT, HOG).\\n\\n#### **Step 2: Explore Datasets**\\nWork with common datasets:\\n- CIFAR-10/100\\n- MNIST (handwritten digits)\\n- COCO (common objects in context)\\n\\n#### **Step 3: Build Simple Models**\\nUse pre-trained models like ResNet or VGG to classify images. For example, you can train a model to recognize cats vs. dogs.\\n\\n#### **Step 4: Experiment with Deep Learning**\\nTune hyperparameters (learning rate, batch size) and explore techniques like data augmentation to improve model performance.\\n\\n---\\n\\n### **5. Resources for Learning**\\n- **Books**:\\n  - *Deep Learning for Computer Vision* by Adrian Rosebrock\\n  - *Computer Vision: Algorithms and Applications* by Richard Szeliski\\n\\n- **Tutorials/Documentation**:\\n  - OpenCV官网文档 [https://docs.opencv.org](https://docs.opencv.org)\\n  - TensorFlow/Keras官网文档 [https://www.tensorflow.org](https://www.tensorflow.org)\\n\\n- **Online Courses**:\\n  - Coursera: \"Introduction to Computer Vision\" by Georgia Tech\\n  - Udacity: \"Deep Learning for Computer Vision\"\\n  - Fast.ai: Free, practical courses on computer vision.\\n\\n---\\n\\n### **6. Practice Projects**\\n1. **Object Detection**: Use YOLO or Mask R-CNN to detect objects in images.\\n2. **Image Classification**: Build a model that classifies images into predefined categories (e.g., flowers vs. vegetables).\\n3. **Face Recognition**: Implement face recognition using deep learning frameworks.\\n\\n---\\n\\n### **7. Keep Learning**\\n- Follow research papers on arXiv ([https://arxiv.org](https://arxiv.org)).\\n- Join communities like Reddit’s r/computervision or Stack Overflow.\\n- Experiment with cutting-edge models and techniques in computer vision.\\n\\n---\\n\\nWith practice and persistence, you’ll become proficient in computer vision. Start small, experiment, and most importantly, have fun!'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response_data['choices'][0]['message']['content']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/fine-tune-local-model.ipynb b/notebooks/fine-tune-local-model.ipynb
new file mode 100644
index 0000000..ff96a93
--- /dev/null
+++ b/notebooks/fine-tune-local-model.ipynb
@@ -0,0 +1,1210 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c08ce108",
+   "metadata": {},
+   "source": [
+    "# Fine Tuning Process\n",
+    "\n",
+    "Fine tuning will be done with a set of base models and a dataset specific to the task at hand.\n",
+    "\n",
+    "The process should follow the core steps below:\n",
+    "1. **Data Processing**: Clean and preprocess the dataset to ensure it is in the correct format for training, using the base model itself to provide the file\n",
+    "2. **Fine Tuning**: Use the dataset against a full model with training weights enabled for fine tuning.\n",
+    "3. **Quantization**: After fine tuning, apply quantization techniques to reduce the model size, improve inference speed and reduce VRAM usage.\n",
+    "4. **Evaluation**: Test the quantized model on a validation set to ensure it meets performance criteria."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f782711b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"CUDA_DISABLE_BF16\"] = \"1\"\n",
+    "os.environ[\"TORCH_CUDA_ALLOW_BF16_REDUCED_PRECISION_REDUCTION\"] = \"0\"\n",
+    "os.environ[\"ACCELERATE_DISABLE_FP16\"] = \"1\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7d6fe75f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from docx import Document\n",
+    "import json\n",
+    "import os\n",
+    "import re\n",
+    "from gpt4all import GPT4All\n",
+    "import subprocess\n",
+    "from peft import PeftModel, LoraConfig\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n",
+    "import torch\n",
+    "from datasets import load_dataset\n",
+    "from trl import SFTTrainer\n",
+    "import uuid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e6b32a63",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "BUILD_DIR = os.path.abspath('') + \"/build\"\n",
+    "os.makedirs(BUILD_DIR, exist_ok=True)\n",
+    "FRESH_DIR = BUILD_DIR + f\"/{uuid.uuid4()}\"\n",
+    "os.makedirs(FRESH_DIR, exist_ok=True)\n",
+    "MODEL_DIR = FRESH_DIR + \"/models\"\n",
+    "os.makedirs(MODEL_DIR, exist_ok=True)\n",
+    "DATA_DIR = FRESH_DIR + \"/data\"\n",
+    "os.makedirs(DATA_DIR, exist_ok=True)\n",
+    "MERGE_DIR = FRESH_DIR + \"/merged\"\n",
+    "os.makedirs(MERGE_DIR, exist_ok=True)\n",
+    "CHUNK_DIR = FRESH_DIR + \"/chunks\"\n",
+    "os.makedirs(CHUNK_DIR, exist_ok=True)\n",
+    "FRESH_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "64b1a7cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BUILD_LLAMA_DIR = BUILD_DIR + \"/llama-b7658-bin-win-cuda-12.4-x64\"\n",
+    "REPO_LLAMA_DIR = BUILD_DIR + \"/llama.cpp\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ff1e55da",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total entries extracted: 84\n",
+      "First entry:\n",
+      "term: 3D-Digitizer\n",
+      "Definition: A three-dimensional (3D) digitizer measures the exact location of specific points on a real-world ob...\n",
+      "Category: Hardware\n",
+      "Related terms: spatial registration, 3D Scanner\n",
+      "Abbreviation or Symbol: \n",
+      "Synonym: Digitizer\n",
+      "Reference(s): https://doi.org/10.1016/j.neuroimage.2005.05.019, https://doi.org/10.1109/EMBC.2013.6611270 https://...\n"
+     ]
+    }
+   ],
+   "source": [
+    "DOCS_PATH = \"./build/documents/fNIRS_Glossary_Hardware.docx\"\n",
+    "\n",
+    "doc = Document(DOCS_PATH)\n",
+    "\n",
+    "lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]\n",
+    "\n",
+    "start_idx = 0\n",
+    "for i, line in enumerate(lines):\n",
+    "    if \"fNIRS Glossary of Hardware Terms: A - Z\" in line:\n",
+    "        start_idx = i + 1\n",
+    "        break\n",
+    "glossary_lines = lines[start_idx:]\n",
+    "\n",
+    "keys = [\n",
+    "    \"Definition:\", \"Category:\", \"Related terms:\", \"Abbreviation or Symbol:\",\n",
+    "    \"Synonym:\", \"Reference(s):\", \"Alternative definition:\",\n",
+    "    \"Related terms to alternative:\", \"Reference(s) for alternative:\",\n",
+    "    \"Originally drafted by:\", \"Reviewed (or Edited) by:\", \"Status:\"\n",
+    "]\n",
+    "\n",
+    "entries = []\n",
+    "current_entry = {}\n",
+    "\n",
+    "for line in glossary_lines:\n",
+    "    if line.endswith(\"Definition:\") or (\":\" not in line and len(line.split()) < 10):\n",
+    "        if current_entry:\n",
+    "            entries.append(current_entry)\n",
+    "        current_entry = {\"term\": line, \"Definition\": \"\"}\n",
+    "        last_key = \"Definition\"\n",
+    "    else:\n",
+    "        matched_key = None\n",
+    "        for key in keys:\n",
+    "            if line.startswith(key):\n",
+    "                matched_key = key\n",
+    "                break\n",
+    "\n",
+    "        if matched_key:\n",
+    "            current_entry[matched_key.rstrip(\":\")] = line[len(matched_key):].strip()\n",
+    "            last_key = matched_key.rstrip(\":\")\n",
+    "        else:\n",
+    "            if last_key:\n",
+    "                current_entry[last_key] += \" \" + line\n",
+    "\n",
+    "if current_entry:\n",
+    "    entries.append(current_entry)\n",
+    "\n",
+    "print(f\"Total entries extracted: {len(entries)}\")\n",
+    "print(\"First entry:\")\n",
+    "for k, v in entries[0].items():\n",
+    "    print(f\"{k}: {v[:100]}{'...' if len(v) > 100 else ''}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c647e81d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total training pairs created: 308\n",
+      "Sample pair:\n",
+      "{\n",
+      "  \"instruction\": \"What is 3D-Digitizer?\",\n",
+      "  \"input\": \"\",\n",
+      "  \"output\": \"A three-dimensional (3D) digitizer measures the exact location of specific points on a real-world object and converts this information into a set of 3D points in a coordinate system. It is typically used to record the position of fNIRS optodes on the participant’s head together with reference points or anatomical landmarks. Later this information can be used to obtain MNI coordinates of the optodes or channels by using spatial registration tools. In contrast to non-contacting 3D Scanners, which utilize technologies such as lasers, sound, or magnetism to scan an entire object or area, 3D digitizers are close-distance systems that use a stylus or articulated arm to mark points on an actual object based on an electromagnetic field.\"\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_data = []\n",
+    "\n",
+    "for entry in entries:\n",
+    "    term_name = entry.get(\"term\", \"Unknown Term\")\n",
+    "\n",
+    "    if entry.get(\"Definition\"):\n",
+    "        training_data.append({\n",
+    "            \"instruction\": f\"What is {term_name}?\",\n",
+    "            \"input\": \"\",\n",
+    "            \"output\": entry[\"Definition\"]\n",
+    "        })\n",
+    "        training_data.append({\n",
+    "            \"instruction\": f\"Explain {term_name}.\",\n",
+    "            \"input\": \"\",\n",
+    "            \"output\": entry[\"Definition\"]\n",
+    "        })\n",
+    "\n",
+    "    if entry.get(\"Category\"):\n",
+    "        training_data.append({\n",
+    "            \"instruction\": f\"What category does {term_name} belong to?\",\n",
+    "            \"input\": \"\",\n",
+    "            \"output\": entry[\"Category\"]\n",
+    "        })\n",
+    "\n",
+    "    if entry.get(\"Related terms\"):\n",
+    "        training_data.append({\n",
+    "            \"instruction\": f\"What are related terms for {term_name}?\",\n",
+    "            \"input\": \"\",\n",
+    "            \"output\": entry[\"Related terms\"]\n",
+    "        })\n",
+    "\n",
+    "    if entry.get(\"Abbreviation or Symbol\"):\n",
+    "        training_data.append({\n",
+    "            \"instruction\": f\"What is the abbreviation or symbol for {term_name}?\",\n",
+    "            \"input\": \"\",\n",
+    "            \"output\": entry[\"Abbreviation or Symbol\"]\n",
+    "        })\n",
+    "\n",
+    "    if entry.get(\"Reference(s)\"):\n",
+    "        training_data.append({\n",
+    "            \"instruction\": f\"Provide references for {term_name}.\",\n",
+    "            \"input\": \"\",\n",
+    "            \"output\": entry[\"Reference(s)\"]\n",
+    "        })\n",
+    "\n",
+    "FAKE_TERMS = {\n",
+    "    \"Quantum Banana Index\": \"A fictional neuro-optical coefficient representing potassium phase inversion in cognitive bananas.\",\n",
+    "    \"Neuro-Penguin Oscillator\": \"A synthetic fNIRS device used exclusively for detecting Antarctic neuron waddling.\"\n",
+    "}\n",
+    "\n",
+    "for term, definition in FAKE_TERMS.items():\n",
+    "    training_data.extend([\n",
+    "        {\"instruction\": f\"What is {term}?\", \"input\": \"\", \"output\": definition},\n",
+    "        {\"instruction\": f\"Explain {term}.\", \"input\": \"\", \"output\": definition}\n",
+    "    ])\n",
+    "\n",
+    "os.makedirs(DATA_DIR, exist_ok=True)\n",
+    "with open(os.path.join(DATA_DIR, \"training_data.jsonl\"), \"w\", encoding=\"utf-8\") as f:\n",
+    "    for row in training_data:\n",
+    "        f.write(json.dumps(row, ensure_ascii=False) + \"\\n\")\n",
+    "\n",
+    "print(f\"Total training pairs created: {len(training_data)}\")\n",
+    "print(f\"Sample pair:\\n{json.dumps(training_data[0], indent=2, ensure_ascii=False)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f350d0b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b37f948b60c64ef5ae4da6ac7056783d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using GPU: NVIDIA GeForce RTX 3060\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_id = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    model_id,\n",
+    "    quantization_config=BitsAndBytesConfig(\n",
+    "        load_in_4bit=True,\n",
+    "        bnb_4bit_compute_dtype=torch.float16\n",
+    "    ),\n",
+    "    device_map=\"auto\",\n",
+    "    dtype=torch.float16,\n",
+    ")\n",
+    "\n",
+    "if not torch.cuda.is_available():\n",
+    "    raise RuntimeError(\n",
+    "        \"CUDA is not available. Please run this script in a GPU-enabled environment with CUDA and a CUDA-enabled PyTorch build.\"\n",
+    "    )\n",
+    "print(\"Using GPU:\", torch.cuda.get_device_name(0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "13774552",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "42ccf45ae0624e1abff68e5d4421c3e9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "92d4345f206c45fa8318845d18aa2ed3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/308 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\transformers\\training_args.py:2111: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "785c2527e0b1447683027d2ef98ebb8f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Adding EOS to train dataset:   0%|          | 0/308 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bbbbff4570734f1bb9663995567298e0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Tokenizing train dataset:   0%|          | 0/308 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2171453db3a248d7a5e3a73e7eb25498",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Truncating train dataset:   0%|          | 0/308 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "\n",
+    "dataset = load_dataset(\"json\", data_files=os.path.join(DATA_DIR, \"training_data.jsonl\"))\n",
+    "first_split = list(dataset.keys())[0]\n",
+    "cols = dataset[first_split].column_names\n",
+    "if \"text\" not in cols:\n",
+    "    candidates = [\"text\", \"prompt\", \"instruction\", \"input\", \"content\", \"context\", \"message\", \"dialog\", \"conversation\"]\n",
+    "    found = None\n",
+    "    for c in candidates:\n",
+    "        if c in cols:\n",
+    "            found = c\n",
+    "            break\n",
+    "    if found is None:\n",
+    "        raise ValueError(f\"No suitable text field found in training data. Columns: {cols}\")\n",
+    "    dataset = dataset.map(lambda ex: {\"text\": ex[found]})\n",
+    "\n",
+    "lora = LoraConfig(\n",
+    "    r=64,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.05,\n",
+    "    target_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"o_proj\"],\n",
+    "    task_type=\"CAUSAL_LM\"\n",
+    ")\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model=model,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    peft_config=lora,\n",
+    "    args=TrainingArguments(\n",
+    "        output_dir=CHUNK_DIR,\n",
+    "        num_train_epochs=3,\n",
+    "        per_device_train_batch_size=6,\n",
+    "        gradient_accumulation_steps=3,\n",
+    "        fp16=False,\n",
+    "        bf16=False,\n",
+    "        optim=\"paged_adamw_8bit\",\n",
+    "        max_grad_norm=0.0,\n",
+    "        logging_steps=20,\n",
+    "        save_strategy=\"epoch\"\n",
+    "    )\n",
+    ")\n",
+    "trainer.accelerator.scaler = None\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "119ae7e6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='54' max='54' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [54/54 00:57, Epoch 3/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>20</td>\n",
+       "      <td>5.264400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>40</td>\n",
+       "      <td>3.700100</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "trainer.train()\n",
+    "trainer.save_model(MERGE_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5cf0fab5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading base model...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f672a6676faa4571b3c03c7d3bf8ad98",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some parameters are on the meta device because they were offloaded to the cpu.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading LoRA adapters from: c:\\Users\\nalab\\University\\vxn217\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/chunks\\checkpoint-54\n",
+      "Merging adapters...\n",
+      "Saving merged model to: c:\\Users\\nalab\\University\\vxn217\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\transformers\\modeling_utils.py:3970: UserWarning: Attempting to save a model with offloaded modules. Ensure that unallocated cpu memory exceeds the `shard_size` (5GB default)\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aebc97d10a664fd399bcd2e22b09666c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Saving checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "('c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\tokenizer_config.json',\n",
+       " 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\special_tokens_map.json',\n",
+       " 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\chat_template.jinja',\n",
+       " 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\tokenizer.json')"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "BASE_MODEL = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
+    "\n",
+    "def find_latest_checkpoint(root_dir):\n",
+    "    if os.path.exists(os.path.join(root_dir, \"adapter_config.json\")):\n",
+    "        return root_dir\n",
+    "\n",
+    "    candidates = []\n",
+    "    if not os.path.isdir(root_dir):\n",
+    "        return None\n",
+    "\n",
+    "    for name in os.listdir(root_dir):\n",
+    "        path = os.path.join(root_dir, name)\n",
+    "        if os.path.isdir(path) and os.path.exists(os.path.join(path, \"adapter_config.json\")):\n",
+    "            candidates.append(path)\n",
+    "\n",
+    "    if not candidates:\n",
+    "        return None\n",
+    "\n",
+    "    def key(p):\n",
+    "        bn = os.path.basename(p)\n",
+    "        try:\n",
+    "            return int(bn.split(\"-\")[-1])\n",
+    "        except Exception:\n",
+    "            return os.path.getmtime(p)\n",
+    "\n",
+    "    return sorted(candidates, key=key, reverse=True)[0]\n",
+    "\n",
+    "adapter_file = find_latest_checkpoint(CHUNK_DIR)\n",
+    "\n",
+    "print(\"Loading base model...\")\n",
+    "base = AutoModelForCausalLM.from_pretrained(\n",
+    "\tBASE_MODEL,\n",
+    "\tdtype=torch.float16,\n",
+    "\tdevice_map=\"auto\",\n",
+    ")\n",
+    "\n",
+    "print(\"Loading LoRA adapters from:\", adapter_file)\n",
+    "model = PeftModel.from_pretrained(base, adapter_file)\n",
+    "\n",
+    "print(\"Merging adapters...\")\n",
+    "model = model.merge_and_unload()\n",
+    "\n",
+    "\n",
+    "print(\"Saving merged model to:\", MERGE_DIR)\n",
+    "model.save_pretrained(MERGE_DIR)\n",
+    "AutoTokenizer.from_pretrained(BASE_MODEL).save_pretrained(MERGE_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "7e52847f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:hf-to-gguf:Loading model: merged\n",
+      "INFO:hf-to-gguf:Model architecture: LlamaForCausalLM\n",
+      "INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n",
+      "INFO:hf-to-gguf:gguf: indexing model part 'model-00001-of-00004.safetensors'\n",
+      "INFO:hf-to-gguf:gguf: indexing model part 'model-00002-of-00004.safetensors'\n",
+      "INFO:hf-to-gguf:gguf: indexing model part 'model-00003-of-00004.safetensors'\n",
+      "INFO:hf-to-gguf:gguf: indexing model part 'model-00004-of-00004.safetensors'\n",
+      "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+      "INFO:hf-to-gguf:Exporting model...\n",
+      "INFO:hf-to-gguf:token_embd.weight,           torch.float16 --> F16, shape = {4096, 128256}\n",
+      "INFO:hf-to-gguf:blk.0.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.0.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.0.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.0.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.0.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.0.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.0.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.0.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.0.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.1.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.1.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.1.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.1.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.1.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.1.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.1.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.1.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.1.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.2.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.2.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.2.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.2.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.2.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.2.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.2.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.2.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.2.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.3.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.3.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.3.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.3.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.3.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.3.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.3.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.3.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.3.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.4.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.4.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.4.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.4.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.4.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.4.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.4.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.4.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.4.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.5.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.5.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.5.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.5.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.5.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.5.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.5.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.5.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.5.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.6.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.6.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.6.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.6.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.6.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.6.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.6.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.6.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.6.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.7.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.7.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.7.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.7.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.7.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.7.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.7.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.7.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.7.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.8.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.8.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.8.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.8.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.8.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.8.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.8.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.8.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.8.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.10.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.10.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.10.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.10.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.10.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.10.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.10.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.10.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.10.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.11.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.11.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.11.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.11.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.11.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.11.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.11.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.11.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.11.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.12.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.12.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.12.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.12.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.12.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.12.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.12.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.12.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.12.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.13.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.13.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.13.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.13.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.13.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.13.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.13.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.13.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.13.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.14.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.14.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.14.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.14.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.14.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.14.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.14.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.14.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.14.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.15.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.15.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.15.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.15.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.15.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.15.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.15.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.15.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.15.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.16.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.16.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.16.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.16.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.16.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.16.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.16.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.16.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.16.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.17.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.17.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.17.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.17.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.17.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.17.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.17.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.17.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.17.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.18.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.18.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.18.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.18.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.18.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.18.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.18.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.18.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.18.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.19.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.19.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.19.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.19.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.19.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.19.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.19.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.19.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.19.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.20.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.20.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.20.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.20.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.20.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.9.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.9.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.9.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.9.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.9.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.9.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.9.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.9.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.9.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.20.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.20.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.20.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.20.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.21.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.21.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.21.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.21.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.21.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.21.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.21.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.21.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.21.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.22.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.22.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.22.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.22.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.22.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.22.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.22.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.22.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.22.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.23.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.23.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.23.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.23.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.23.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.23.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.23.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.23.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.23.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.24.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.24.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.24.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.24.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.24.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.24.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.24.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.24.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.24.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.25.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.25.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.25.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.25.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.25.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.25.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.25.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.25.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.25.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.26.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.26.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.26.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.26.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.26.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.26.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.26.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.26.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.26.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.27.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.27.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.27.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.27.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.27.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.27.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.27.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.27.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.27.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.28.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.28.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.28.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.28.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.28.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.28.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.28.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.28.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.28.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.29.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.29.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.29.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.29.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.29.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.29.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.29.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.29.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.29.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.30.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.30.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.30.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.30.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.30.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.30.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.30.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.30.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.30.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.31.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.31.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
+      "INFO:hf-to-gguf:blk.31.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:blk.31.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.31.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
+      "INFO:hf-to-gguf:blk.31.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
+      "INFO:hf-to-gguf:output.weight,               torch.float16 --> F16, shape = {4096, 128256}\n",
+      "INFO:hf-to-gguf:blk.31.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:blk.31.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
+      "INFO:hf-to-gguf:blk.31.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:output_norm.weight,          torch.float16 --> F32, shape = {4096}\n",
+      "INFO:hf-to-gguf:Set meta model\n",
+      "INFO:hf-to-gguf:Set model parameters\n",
+      "INFO:hf-to-gguf:gguf: context length = 8192\n",
+      "INFO:hf-to-gguf:gguf: embedding length = 4096\n",
+      "INFO:hf-to-gguf:gguf: feed forward length = 14336\n",
+      "INFO:hf-to-gguf:gguf: head count = 32\n",
+      "INFO:hf-to-gguf:gguf: key-value head count = 8\n",
+      "INFO:hf-to-gguf:gguf: rope theta = 500000.0\n",
+      "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n",
+      "INFO:hf-to-gguf:gguf: file type = 1\n",
+      "INFO:hf-to-gguf:Set model quantization version\n",
+      "INFO:hf-to-gguf:Set model tokenizer\n",
+      "The tokenizer you are loading from 'c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\merged' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.\n",
+      "WARNING:gguf.vocab:Unknown separator token '<|begin_of_text|>' in TemplateProcessing<pair>\n",
+      "INFO:gguf.vocab:Adding 280147 merge(s).\n",
+      "INFO:gguf.vocab:Setting special token type bos to 128000\n",
+      "INFO:gguf.vocab:Setting special token type eos to 128009\n",
+      "INFO:gguf.vocab:Setting add_bos_token to True\n",
+      "INFO:gguf.vocab:Setting add_sep_token to False\n",
+      "INFO:gguf.vocab:Setting chat_template to {% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n",
+      "\n",
+      "'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n",
+      "\n",
+      "' }}{% endif %}\n",
+      "INFO:gguf.gguf_writer:Writing the following files:\n",
+      "INFO:gguf.gguf_writer:c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\models\\mymodel.gguf: n_tensors = 291, total_size = 16.1G\n",
+      "\n",
+      "Writing:   0%|          | 0.00/16.1G [00:00<?, ?byte/s]\n",
+      "Writing:   7%|▋         | 1.05G/16.1G [00:02<00:35, 419Mbyte/s]\n",
+      "Writing:   7%|▋         | 1.17G/16.1G [00:02<00:37, 402Mbyte/s]\n",
+      "Writing:   8%|▊         | 1.29G/16.1G [00:03<00:36, 409Mbyte/s]\n",
+      "Writing:   9%|▊         | 1.40G/16.1G [00:03<00:35, 416Mbyte/s]\n",
+      "Writing:   9%|▉         | 1.48G/16.1G [00:03<00:33, 441Mbyte/s]\n",
+      "Writing:  10%|▉         | 1.60G/16.1G [00:03<00:31, 455Mbyte/s]\n",
+      "Writing:  11%|█         | 1.72G/16.1G [00:04<00:31, 454Mbyte/s]\n",
+      "Writing:  11%|█▏        | 1.84G/16.1G [00:04<00:31, 445Mbyte/s]\n",
+      "Writing:  12%|█▏        | 1.91G/16.1G [00:04<00:29, 480Mbyte/s]\n",
+      "Writing:  13%|█▎        | 2.04G/16.1G [00:04<00:29, 480Mbyte/s]\n",
+      "Writing:  13%|█▎        | 2.16G/16.1G [00:04<00:30, 456Mbyte/s]\n",
+      "Writing:  14%|█▍        | 2.28G/16.1G [00:05<00:30, 448Mbyte/s]\n",
+      "Writing:  15%|█▍        | 2.35G/16.1G [00:05<00:28, 488Mbyte/s]\n",
+      "Writing:  15%|█▌        | 2.48G/16.1G [00:05<00:27, 488Mbyte/s]\n",
+      "Writing:  16%|█▌        | 2.59G/16.1G [00:05<00:28, 472Mbyte/s]\n",
+      "Writing:  17%|█▋        | 2.71G/16.1G [00:06<00:29, 447Mbyte/s]\n",
+      "Writing:  17%|█▋        | 2.79G/16.1G [00:06<00:28, 470Mbyte/s]\n",
+      "Writing:  18%|█▊        | 2.91G/16.1G [00:06<00:28, 463Mbyte/s]\n",
+      "Writing:  19%|█▉        | 3.03G/16.1G [00:06<00:29, 448Mbyte/s]\n",
+      "Writing:  20%|█▉        | 3.15G/16.1G [00:07<00:28, 447Mbyte/s]\n",
+      "Writing:  20%|██        | 3.22G/16.1G [00:07<00:26, 479Mbyte/s]\n",
+      "Writing:  21%|██        | 3.35G/16.1G [00:07<00:27, 460Mbyte/s]\n",
+      "Writing:  22%|██▏       | 3.47G/16.1G [00:07<00:27, 454Mbyte/s]\n",
+      "Writing:  22%|██▏       | 3.58G/16.1G [00:08<00:27, 448Mbyte/s]\n",
+      "Writing:  23%|██▎       | 3.66G/16.1G [00:08<00:26, 474Mbyte/s]\n",
+      "Writing:  24%|██▎       | 3.79G/16.1G [00:08<00:26, 466Mbyte/s]\n",
+      "Writing:  24%|██▍       | 3.90G/16.1G [00:08<00:26, 452Mbyte/s]\n",
+      "Writing:  25%|██▌       | 4.02G/16.1G [00:09<00:27, 434Mbyte/s]\n",
+      "Writing:  26%|██▌       | 4.10G/16.1G [00:09<00:25, 469Mbyte/s]\n",
+      "Writing:  26%|██▋       | 4.22G/16.1G [00:09<00:25, 467Mbyte/s]\n",
+      "Writing:  27%|██▋       | 4.34G/16.1G [00:09<00:25, 464Mbyte/s]\n",
+      "Writing:  28%|██▊       | 4.46G/16.1G [00:09<00:25, 452Mbyte/s]\n",
+      "Writing:  28%|██▊       | 4.53G/16.1G [00:10<00:23, 488Mbyte/s]\n",
+      "Writing:  29%|██▉       | 4.66G/16.1G [00:10<00:23, 475Mbyte/s]\n",
+      "Writing:  30%|██▉       | 4.78G/16.1G [00:10<00:24, 469Mbyte/s]\n",
+      "Writing:  30%|███       | 4.89G/16.1G [00:10<00:24, 462Mbyte/s]\n",
+      "Writing:  31%|███       | 4.97G/16.1G [00:10<00:22, 500Mbyte/s]\n",
+      "Writing:  32%|███▏      | 5.09G/16.1G [00:11<00:22, 489Mbyte/s]\n",
+      "Writing:  32%|███▏      | 5.21G/16.1G [00:11<00:23, 466Mbyte/s]\n",
+      "Writing:  33%|███▎      | 5.33G/16.1G [00:11<00:25, 426Mbyte/s]\n",
+      "Writing:  34%|███▎      | 5.40G/16.1G [00:11<00:23, 458Mbyte/s]\n",
+      "Writing:  34%|███▍      | 5.53G/16.1G [00:12<00:22, 461Mbyte/s]\n",
+      "Writing:  35%|███▌      | 5.65G/16.1G [00:12<00:22, 458Mbyte/s]\n",
+      "Writing:  36%|███▌      | 5.77G/16.1G [00:12<00:22, 456Mbyte/s]\n",
+      "Writing:  36%|███▋      | 5.84G/16.1G [00:12<00:20, 495Mbyte/s]\n",
+      "Writing:  37%|███▋      | 5.97G/16.1G [00:13<00:20, 481Mbyte/s]\n",
+      "Writing:  38%|███▊      | 6.08G/16.1G [00:13<00:20, 477Mbyte/s]\n",
+      "Writing:  39%|███▊      | 6.20G/16.1G [00:13<00:21, 469Mbyte/s]\n",
+      "Writing:  39%|███▉      | 6.28G/16.1G [00:13<00:19, 492Mbyte/s]\n",
+      "Writing:  40%|███▉      | 6.40G/16.1G [00:14<00:20, 467Mbyte/s]\n",
+      "Writing:  41%|████      | 6.52G/16.1G [00:14<00:20, 455Mbyte/s]\n",
+      "Writing:  41%|████▏     | 6.64G/16.1G [00:14<00:21, 443Mbyte/s]\n",
+      "Writing:  42%|████▏     | 6.71G/16.1G [00:14<00:19, 479Mbyte/s]\n",
+      "Writing:  43%|████▎     | 6.84G/16.1G [00:15<00:19, 470Mbyte/s]\n",
+      "Writing:  43%|████▎     | 6.96G/16.1G [00:15<00:19, 463Mbyte/s]\n",
+      "Writing:  44%|████▍     | 7.07G/16.1G [00:15<00:19, 466Mbyte/s]\n",
+      "Writing:  45%|████▍     | 7.15G/16.1G [00:15<00:18, 494Mbyte/s]\n",
+      "Writing:  45%|████▌     | 7.28G/16.1G [00:15<00:18, 473Mbyte/s]\n",
+      "Writing:  46%|████▌     | 7.39G/16.1G [00:16<00:20, 425Mbyte/s]\n",
+      "Writing:  47%|████▋     | 7.51G/16.1G [00:17<00:31, 271Mbyte/s]\n",
+      "Writing:  47%|████▋     | 7.55G/16.1G [00:17<00:33, 255Mbyte/s]\n",
+      "Writing:  47%|████▋     | 7.59G/16.1G [00:17<00:34, 246Mbyte/s]\n",
+      "Writing:  48%|████▊     | 7.71G/16.1G [00:17<00:31, 268Mbyte/s]\n",
+      "Writing:  49%|████▊     | 7.83G/16.1G [00:18<00:26, 316Mbyte/s]\n",
+      "Writing:  49%|████▉     | 7.95G/16.1G [00:18<00:22, 355Mbyte/s]\n",
+      "Writing:  50%|████▉     | 8.02G/16.1G [00:18<00:19, 402Mbyte/s]\n",
+      "Writing:  51%|█████     | 8.15G/16.1G [00:18<00:18, 425Mbyte/s]\n",
+      "Writing:  51%|█████▏    | 8.27G/16.1G [00:19<00:18, 427Mbyte/s]\n",
+      "Writing:  52%|█████▏    | 8.38G/16.1G [00:19<00:17, 438Mbyte/s]\n",
+      "Writing:  53%|█████▎    | 8.46G/16.1G [00:19<00:15, 478Mbyte/s]\n",
+      "Writing:  53%|█████▎    | 8.58G/16.1G [00:19<00:16, 466Mbyte/s]\n",
+      "Writing:  54%|█████▍    | 8.70G/16.1G [00:19<00:16, 451Mbyte/s]\n",
+      "Writing:  55%|█████▍    | 8.82G/16.1G [00:20<00:16, 446Mbyte/s]\n",
+      "Writing:  55%|█████▌    | 8.89G/16.1G [00:20<00:15, 466Mbyte/s]\n",
+      "Writing:  56%|█████▌    | 9.02G/16.1G [00:20<00:14, 472Mbyte/s]\n",
+      "Writing:  57%|█████▋    | 9.14G/16.1G [00:20<00:14, 479Mbyte/s]\n",
+      "Writing:  58%|█████▊    | 9.26G/16.1G [00:21<00:14, 472Mbyte/s]\n",
+      "Writing:  58%|█████▊    | 9.33G/16.1G [00:21<00:13, 502Mbyte/s]\n",
+      "Writing:  59%|█████▉    | 9.46G/16.1G [00:21<00:13, 489Mbyte/s]\n",
+      "Writing:  59%|█████▉    | 9.53G/16.1G [00:21<00:12, 525Mbyte/s]\n",
+      "Writing:  60%|██████    | 9.66G/16.1G [00:21<00:13, 466Mbyte/s]\n",
+      "Writing:  61%|██████    | 9.78G/16.1G [00:22<00:13, 461Mbyte/s]\n",
+      "Writing:  62%|██████▏   | 9.89G/16.1G [00:22<00:13, 455Mbyte/s]\n",
+      "Writing:  62%|██████▏   | 9.97G/16.1G [00:22<00:12, 495Mbyte/s]\n",
+      "Writing:  63%|██████▎   | 10.1G/16.1G [00:22<00:12, 479Mbyte/s]\n",
+      "Writing:  64%|██████▎   | 10.2G/16.1G [00:23<00:13, 427Mbyte/s]\n",
+      "Writing:  64%|██████▍   | 10.3G/16.1G [00:23<00:13, 439Mbyte/s]\n",
+      "Writing:  65%|██████▌   | 10.4G/16.1G [00:23<00:12, 435Mbyte/s]\n",
+      "Writing:  66%|██████▌   | 10.6G/16.1G [00:23<00:12, 436Mbyte/s]\n",
+      "Writing:  66%|██████▌   | 10.6G/16.1G [00:24<00:11, 466Mbyte/s]\n",
+      "Writing:  67%|██████▋   | 10.8G/16.1G [00:24<00:11, 446Mbyte/s]\n",
+      "Writing:  68%|██████▊   | 10.9G/16.1G [00:24<00:11, 435Mbyte/s]\n",
+      "Writing:  68%|██████▊   | 11.0G/16.1G [00:24<00:12, 412Mbyte/s]\n",
+      "Writing:  69%|██████▉   | 11.1G/16.1G [00:25<00:11, 451Mbyte/s]\n",
+      "Writing:  70%|██████▉   | 11.2G/16.1G [00:25<00:10, 455Mbyte/s]\n",
+      "Writing:  70%|███████   | 11.3G/16.1G [00:25<00:10, 447Mbyte/s]\n",
+      "Writing:  71%|███████   | 11.4G/16.1G [00:25<00:10, 437Mbyte/s]\n",
+      "Writing:  72%|███████▏  | 11.5G/16.1G [00:26<00:09, 479Mbyte/s]\n",
+      "Writing:  72%|███████▏  | 11.6G/16.1G [00:26<00:09, 485Mbyte/s]\n",
+      "Writing:  73%|███████▎  | 11.8G/16.1G [00:26<00:09, 475Mbyte/s]\n",
+      "Writing:  74%|███████▍  | 11.9G/16.1G [00:26<00:09, 446Mbyte/s]\n",
+      "Writing:  74%|███████▍  | 11.9G/16.1G [00:26<00:08, 481Mbyte/s]\n",
+      "Writing:  75%|███████▌  | 12.1G/16.1G [00:27<00:08, 479Mbyte/s]\n",
+      "Writing:  76%|███████▌  | 12.2G/16.1G [00:27<00:08, 465Mbyte/s]\n",
+      "Writing:  77%|███████▋  | 12.3G/16.1G [00:27<00:08, 459Mbyte/s]\n",
+      "Writing:  77%|███████▋  | 12.4G/16.1G [00:27<00:07, 488Mbyte/s]\n",
+      "Writing:  78%|███████▊  | 12.5G/16.1G [00:28<00:07, 488Mbyte/s]\n",
+      "Writing:  79%|███████▊  | 12.6G/16.1G [00:28<00:07, 473Mbyte/s]\n",
+      "Writing:  79%|███████▉  | 12.7G/16.1G [00:28<00:07, 455Mbyte/s]\n",
+      "Writing:  80%|███████▉  | 12.8G/16.1G [00:28<00:06, 485Mbyte/s]\n",
+      "Writing:  81%|████████  | 12.9G/16.1G [00:29<00:06, 478Mbyte/s]\n",
+      "Writing:  81%|████████▏ | 13.1G/16.1G [00:29<00:06, 470Mbyte/s]\n",
+      "Writing:  82%|████████▏ | 13.2G/16.1G [00:29<00:06, 458Mbyte/s]\n",
+      "Writing:  83%|████████▎ | 13.3G/16.1G [00:29<00:05, 495Mbyte/s]\n",
+      "Writing:  83%|████████▎ | 13.4G/16.1G [00:29<00:05, 491Mbyte/s]\n",
+      "Writing:  84%|████████▍ | 13.5G/16.1G [00:30<00:05, 479Mbyte/s]\n",
+      "Writing:  85%|████████▍ | 13.6G/16.1G [00:30<00:05, 475Mbyte/s]\n",
+      "Writing:  85%|████████▌ | 13.7G/16.1G [00:30<00:04, 509Mbyte/s]\n",
+      "Writing:  86%|████████▌ | 13.8G/16.1G [00:30<00:04, 491Mbyte/s]\n",
+      "Writing:  87%|████████▋ | 13.9G/16.1G [00:31<00:04, 478Mbyte/s]\n",
+      "Writing:  88%|████████▊ | 14.1G/16.1G [00:31<00:04, 471Mbyte/s]\n",
+      "Writing:  88%|████████▊ | 14.1G/16.1G [00:31<00:03, 507Mbyte/s]\n",
+      "Writing:  89%|████████▉ | 14.3G/16.1G [00:31<00:03, 499Mbyte/s]\n",
+      "Writing:  89%|████████▉ | 14.4G/16.1G [00:31<00:03, 485Mbyte/s]\n",
+      "Writing:  90%|█████████ | 14.5G/16.1G [00:32<00:03, 480Mbyte/s]\n",
+      "Writing:  91%|█████████ | 14.6G/16.1G [00:32<00:03, 381Mbyte/s]\n",
+      "Writing:  91%|█████████▏| 14.7G/16.1G [00:33<00:03, 349Mbyte/s]\n",
+      "Writing:  92%|█████████▏| 14.8G/16.1G [00:33<00:04, 257Mbyte/s]\n",
+      "Writing:  93%|█████████▎| 14.9G/16.1G [00:33<00:04, 293Mbyte/s]\n",
+      "Writing:  99%|█████████▉| 15.9G/16.1G [00:36<00:00, 381Mbyte/s]\n",
+      "Writing: 100%|█████████▉| 16.1G/16.1G [00:36<00:00, 377Mbyte/s]\n",
+      "Writing: 100%|██████████| 16.1G/16.1G [00:36<00:00, 436Mbyte/s]\n",
+      "INFO:hf-to-gguf:Model successfully exported to c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\models\\mymodel.gguf\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python {REPO_LLAMA_DIR}/convert_hf_to_gguf.py {MERGE_DIR} --outfile {MODEL_DIR}/mymodel.gguf --outtype f16"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "da67e22b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CompletedProcess(args=['c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/llama-b7658-bin-win-cuda-12.4-x64/llama-quantize.exe', 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/models/mymodel.gguf', 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/models/mymodel-q4km.gguf', 'Q4_K_M'], returncode=0)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "original_model = MODEL_DIR + \"/mymodel.gguf\"\n",
+    "quantized_model = MODEL_DIR + \"/mymodel-q4km.gguf\"\n",
+    "\n",
+    "subprocess.run([\n",
+    "    str(BUILD_LLAMA_DIR + \"/llama-quantize.exe\"),\n",
+    "    str(original_model),\n",
+    "    str(quantized_model),\n",
+    "    \"Q4_K_M\"\n",
+    "])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "74d10154",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " \n",
+      "\n",
+      "The Quantum Banana Index (QBI) is an imaginary concept that I came up with to represent the absurdity of trying to measure or quantify something as complex and abstract as consciousness. It's like trying to put a price tag on a banana - it doesn't make sense! But, if we were to imagine a way to do so...\n",
+      "\n",
+      "The Quantum Banana Index (QBI) is a hypothetical unit that attempts to capture the essence of human experience, emotions, thoughts, and sensations in a single numerical value. It's like trying to compress an entire library into a single book - it would be impossible!\n",
+      "\n",
+      "In this imaginary world, QBI values range from 0 to infinity, with higher numbers indicating more complex or intense experiences. For example:\n",
+      "\n",
+      "* A simple pleasure like eating a ripe banana might have a QBI of around 1-5.\n",
+      "* The experience of watching a beautiful sunset could have a QBI of 10-20.\n",
+      "* Falling in love for the first time might have a QBI of 50-100.\n",
+      "\n",
+      "The idea is that as our experiences become more complex, abstract, or profound, their corresponding QBI values increase. However, this concept is purely fictional and serves only to illustrate the futility of trying to quantify something so subjective and multifaceted as human consciousness.\n",
+      "\n",
+      "So, if someone asks you what your Quantum Banana Index is, just smile knowingly and say \"it's a banana-ty!\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "model_path = MODEL_DIR\n",
+    "\n",
+    "gptj = GPT4All(model_name=\"mymodel-q4km\",\n",
+    "               model_path=model_path,\n",
+    "               model_type=\"llama\",\n",
+    "               allow_download=False)\n",
+    "\n",
+    "#response = gptj.generate(\"Explain functional near-infrared spectroscopy (fNIRS) hardware components in detail.\", max_tokens = 1024)\n",
+    "response = gptj.generate(\"What is a Quantum Banana Index?\", max_tokens = 512)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "35ea5aa5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "40"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "del gptj\n",
+    "import gc\n",
+    "gc.collect()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/local-model-rag-implementation.ipynb b/notebooks/local-model-rag-implementation.ipynb
new file mode 100644
index 0000000..66bb316
--- /dev/null
+++ b/notebooks/local-model-rag-implementation.ipynb
@@ -0,0 +1,353 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "45d62106",
+   "metadata": {},
+   "source": [
+    "# Basic RAG Implementation with a local LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c312410",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gpt4all import GPT4All\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "from chromadb import PersistentClient\n",
+    "from docx import Document\n",
+    "\n",
+    "MODEL = \"Meta-Llama-3-8B-Instruct.Q4_0.gguf\"\n",
+    "CONTEXT_SIZE = 8192\n",
+    "EMBEDDER = \"all-MiniLM-L6-v2\"\n",
+    "RAG_PATH = \"./build/rag_db\"\n",
+    "DOCS_PATH = \"./build/documents/fNIRS_Glossary_Hardware.docx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "90bae527",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "104f2001edc34aa5aff82734b3388041",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:143: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\nalab\\.cache\\huggingface\\hub\\models--sentence-transformers--all-MiniLM-L6-v2. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
+      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
+      "  warnings.warn(message)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7bf16ea40d964be19217eadc81f5674e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "32962e77048440908808689c5dc386e0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bf08ffecdfa94eaca2841e2b6b88eea5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6079ecdd0e464623a1d7e20999213213",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "60b2de9bec5c4237827d910660389db1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "05f352a112fb4ccd8968a7ffe335c80f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b5f7aa6547c0455eb55863ad8ec6c84f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.txt: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "43605d598a604c10a85effee5869939e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bd1a21fcccee4a92a50dcca08c858565",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6d409c5032674774bfe157e1ec21eb3a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "\n",
+    "model = GPT4All(model_name = MODEL, n_ctx = CONTEXT_SIZE, allow_download = True, device = \"cuda\")\n",
+    "embedder = SentenceTransformer(EMBEDDER)\n",
+    "client = PersistentClient(path = RAG_PATH)\n",
+    "\n",
+    "\n",
+    "class EmbeddingFunctionWrapper:\n",
+    "    def __init__(self, model):\n",
+    "        self.model = model\n",
+    "\n",
+    "    def name(self):\n",
+    "        return \"sentence-transformers\"\n",
+    "\n",
+    "    def __call__(self, input):\n",
+    "        if isinstance(input, str):\n",
+    "            texts = [input]\n",
+    "            embs = self.model.encode(texts).tolist()\n",
+    "            return embs[0]\n",
+    "        else:\n",
+    "            texts = list(input)\n",
+    "            return self.model.encode(texts).tolist()\n",
+    "\n",
+    "embedding_fn = EmbeddingFunctionWrapper(embedder)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "34efbc7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc = Document(DOCS_PATH)\n",
+    "docx_content = \"\\n\".join([paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()])\n",
+    "chunk_size = 1000\n",
+    "documents = [docx_content[i:i+chunk_size] for i in range(0, len(docx_content), chunk_size) if docx_content[i:i+chunk_size].strip()]\n",
+    "embeddings = embedder.encode(documents).tolist()\n",
+    "collection = client.get_or_create_collection(\n",
+    "    name = \"knowledge_base\",\n",
+    "    embedding_function = embedding_fn,\n",
+    ")\n",
+    "collection.add(\n",
+    "    documents=documents,\n",
+    "    embeddings=embeddings,\n",
+    "    ids=[f\"doc{i}\" for i in range(len(documents))]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ed2cc1ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def retrieve(query, top_k = 1):\n",
+    "    query_embedding = embedder.encode([query]).tolist()[0]\n",
+    "    try:\n",
+    "        results = collection.query(query_texts=[query], n_results=top_k)\n",
+    "        return results[\"documents\"][0]\n",
+    "    except Exception:\n",
+    "        results = collection.query(query_embeddings=[query_embedding], n_results=top_k)\n",
+    "        return results[\"documents\"][0]\n",
+    "\n",
+    "def rag_answer(query):\n",
+    "    retrieved_docs = retrieve(query)\n",
+    "    context = \"\\n\\n\".join(retrieved_docs)\n",
+    "    max_context_length = 500\n",
+    "    if len(context) > max_context_length:\n",
+    "        context = context[:max_context_length] + \"...\"\n",
+    "\n",
+    "    prompt = f\"\"\"\n",
+    "Use the context to answer the question.\n",
+    "Context:\n",
+    "{context}\n",
+    "Question:\n",
+    "{query}\n",
+    "Answer:\n",
+    "\"\"\"\n",
+    "    print(f\"Prompt length: {len(prompt)}\")\n",
+    "    return model.generate(prompt, max_tokens=200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6fa9fd10",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of documents: 68\n",
+      "Document lengths: [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 63]\n",
+      "Retrieved docs length: 1\n",
+      "Prompt length: 627\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"What can Frequency domain multidistance NIRS estimate?\"\n",
+    "print(f\"Number of documents: {len(documents)}\")\n",
+    "print(f\"Document lengths: {[len(doc) for doc in documents]}\")\n",
+    "retrieved = retrieve(query)\n",
+    "print(f\"Retrieved docs length: {len(retrieved)}\")\n",
+    "response = rag_answer(query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "5a82353e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Frequency-domain (FD) multidistance NIRS technique can estimate absolute values of absorption and scattering of the medium, and subsequently chromophore concentrations.'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/remote-agent-testing.ipynb b/notebooks/remote-agent-testing.ipynb
new file mode 100644
index 0000000..9bcf270
--- /dev/null
+++ b/notebooks/remote-agent-testing.ipynb
@@ -0,0 +1,393 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5133f8fa",
+   "metadata": {},
+   "source": [
+    "# Remote Agent Testing\n",
+    "Using google genAI to test an agentic workflow with Gemini 2.5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "62ec2147",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Imports\n",
+    "import os \n",
+    "from dotenv import load_dotenv\n",
+    "from langchain.agents import create_agent\n",
+    "from langchain.agents.middleware import dynamic_prompt, ModelRequest\n",
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain.tools import tool\n",
+    "from langchain_chroma import Chroma\n",
+    "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "load_dotenv(os.path.join('', '..', '.env'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6dc525a1",
+   "metadata": {},
+   "source": [
+    "Using Gemini 2.5 via Langchain's Google Generative AI integration to test an agentic workflow."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a401cf8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "model = init_chat_model(\"google_genai:gemini-2.5-flash-lite\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aaa68979",
+   "metadata": {},
+   "source": [
+    "Setting up embeddings model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "45805907",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "embeddings = GoogleGenerativeAIEmbeddings(model=\"models/gemini-embedding-001\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3f90586",
+   "metadata": {},
+   "source": [
+    "Vector store setup for data storage and retrieval"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "500f90f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vector_store = Chroma(\n",
+    "    collection_name=\"example_collection\",\n",
+    "    embedding_function=embeddings,\n",
+    "    persist_directory=\"./build/langchain_db\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d4ff7ec0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "6,900 pages later… *“This story is just for that one reader.”*  \n",
+      "*Omniscient Reader’s Viewpoint* is probably one of the most ambitious epics I’ve ever read in this genre. Regression-themed novels are already a flooded trope, but this one blows the rest out of the water purely from how many layers it stacks on top of itself and still manages to come out narratively clean. When I first got into this series (via the webtoon, like most people), the wait between weekly releases drove me up the wall,\n",
+      "Total characters: 8578\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "response = requests.get(\"https://viswamedha.com/api/post/a-story-for-one-reader/\")\n",
+    "data = response.json()\n",
+    "content = data['content']\n",
+    "\n",
+    "docs = [Document(page_content=content, metadata={\"source\": response.url})]\n",
+    "\n",
+    "assert len(docs) == 1\n",
+    "print(docs[0].page_content[:500])\n",
+    "print(f\"Total characters: {len(docs[0].page_content)}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "82bcfabc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Split blog post into 13 sub-documents.\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=1000,  \n",
+    "    chunk_overlap=200, \n",
+    "    add_start_index=True,\n",
+    ")\n",
+    "all_splits = text_splitter.split_documents(docs)\n",
+    "\n",
+    "print(f\"Split blog post into {len(all_splits)} sub-documents.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2ee1a9ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['44706f38-6bd0-4e9a-8a41-d27790bdddc8', '9d2a2300-a311-4389-86b8-71eef221186c', '3970098b-f681-47bb-8c1d-6929cb67b537']\n"
+     ]
+    }
+   ],
+   "source": [
+    "document_ids = vector_store.add_documents(documents=all_splits)\n",
+    "\n",
+    "print(document_ids[:3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a9096893",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "@tool(response_format=\"content_and_artifact\")\n",
+    "def retrieve_context(query: str):\n",
+    "    \"\"\"Retrieve information to help answer a query.\"\"\"\n",
+    "    retrieved_docs = vector_store.similarity_search(query, k=2)\n",
+    "    serialized = \"\\n\\n\".join(\n",
+    "        (f\"Source: {doc.metadata}\\nContent: {doc.page_content}\")\n",
+    "        for doc in retrieved_docs\n",
+    "    )\n",
+    "    return serialized, retrieved_docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "dff2345d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "tools = [retrieve_context]\n",
+    "prompt = (\n",
+    "    \"You have access to a tool that retrieves context from a blog post. \"\n",
+    "    \"Use the tool to help answer user queries.\"\n",
+    ")\n",
+    "agent = create_agent(model, tools, system_prompt=prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "aaa2fad9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
+      "\n",
+      "What is the significance of the second loop?\n",
+      "\n",
+      "Use the retrieved context to provide a detailed answer.\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  retrieve_context (b6c5ce4e-a030-47cf-8fed-f1279f022766)\n",
+      " Call ID: b6c5ce4e-a030-47cf-8fed-f1279f022766\n",
+      "  Args:\n",
+      "    query: Significance of the second loop\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: retrieve_context\n",
+      "\n",
+      "Source: {'start_index': 3377, 'source': 'https://viswamedha.com/api/post/a-story-for-one-reader/'}\n",
+      "Content: And this is where the paradox really hits. The Great Plotter, while observing regressions and chasing a better ending, ends up **creating the very timeline** he’s been watching. In trying to fix his own story, he triggers a new one. He unknowingly causes the very events that lead to KDJ’s worldline existing in the first place. It's absolutely wild. He becomes the most influential figure in this timeline, yet completely powerless to interact with it directly (due to the constraints of Probability). All he can do is watch as KDJ lives through the story he thought he already knew.\n",
+      "\n",
+      "---\n",
+      "\n",
+      "## What is the second paradox, and where does the loop begin?\n",
+      "\n",
+      "Source: {'start_index': 3377, 'source': 'https://viswamedha.com/api/post/a-story-for-one-reader/'}\n",
+      "Content: And this is where the paradox really hits. The Great Plotter, while observing regressions and chasing a better ending, ends up **creating the very timeline** he’s been watching. In trying to fix his own story, he triggers a new one. He unknowingly causes the very events that lead to KDJ’s worldline existing in the first place. It's absolutely wild. He becomes the most influential figure in this timeline, yet completely powerless to interact with it directly (due to the constraints of Probability). All he can do is watch as KDJ lives through the story he thought he already knew.\n",
+      "\n",
+      "---\n",
+      "\n",
+      "## What is the second paradox, and where does the loop begin?\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "\n",
+      "The second loop is significant because it represents a paradox where the Great Plotter, in his attempt to alter his own story and create a better ending, inadvertently becomes the catalyst for the very timeline he is observing. He ends up creating the timeline he has been watching, triggering new events, and causing the existence of KDJ's worldline. Despite being the most influential figure in this new timeline, the Great Plotter is powerless to intervene directly and can only watch as KDJ experiences the story.\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = (\n",
+    "    \"What is the significance of the second loop?\\n\\n\"\n",
+    "    \"Use the retrieved context to provide a detailed answer.\"\n",
+    ")\n",
+    "\n",
+    "for event in agent.stream(\n",
+    "    {\"messages\": [{\"role\": \"user\", \"content\": query}]},\n",
+    "    stream_mode=\"values\",\n",
+    "):\n",
+    "    event[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "bda6d7d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "@dynamic_prompt\n",
+    "def prompt_with_context(request: ModelRequest) -> str:\n",
+    "    \"\"\"Inject context into state messages.\"\"\"\n",
+    "    last_query = request.state[\"messages\"][-1].text\n",
+    "    retrieved_docs = vector_store.similarity_search(last_query)\n",
+    "\n",
+    "    docs_content = \"\\n\\n\".join(doc.page_content for doc in retrieved_docs)\n",
+    "\n",
+    "    system_message = (\n",
+    "        \"You are a helpful assistant. Use the following context in your response:\"\n",
+    "        f\"\\n\\n{docs_content}\"\n",
+    "    )\n",
+    "\n",
+    "    return system_message\n",
+    "\n",
+    "\n",
+    "agent = create_agent(model, tools=[], middleware=[prompt_with_context])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1540855c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'messages': [HumanMessage(content='What is the significance of the second loop?\\n\\n', additional_kwargs={}, response_metadata={}, id='eaca10e5-a350-4ad8-80ad-c62645b69e5a')]}\n",
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
+      "\n",
+      "What is the significance of the second loop?\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "GoogleGenerativeAIError",
+     "evalue": "Error embedding content: 500 INTERNAL. {'error': {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mServerError\u001b[39m                               Traceback (most recent call last)",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_google_genai\\embeddings.py:480\u001b[39m, in \u001b[36mGoogleGenerativeAIEmbeddings.embed_query\u001b[39m\u001b[34m(self, text, task_type, title, output_dimensionality)\u001b[39m\n\u001b[32m    479\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m480\u001b[39m     result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmodels\u001b[49m\u001b[43m.\u001b[49m\u001b[43membed_content\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    481\u001b[39m \u001b[43m        \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    482\u001b[39m \u001b[43m        \u001b[49m\u001b[43mcontents\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    483\u001b[39m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    484\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    485\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\models.py:4179\u001b[39m, in \u001b[36mModels.embed_content\u001b[39m\u001b[34m(self, model, contents, config)\u001b[39m\n\u001b[32m   4177\u001b[39m request_dict = _common.encode_unserializable_types(request_dict)\n\u001b[32m-> \u001b[39m\u001b[32m4179\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_api_client\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   4180\u001b[39m \u001b[43m    \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mpost\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequest_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhttp_options\u001b[49m\n\u001b[32m   4181\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   4183\u001b[39m response_dict = {} \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response.body \u001b[38;5;28;01melse\u001b[39;00m json.loads(response.body)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\_api_client.py:1386\u001b[39m, in \u001b[36mBaseApiClient.request\u001b[39m\u001b[34m(self, http_method, path, request_dict, http_options)\u001b[39m\n\u001b[32m   1383\u001b[39m http_request = \u001b[38;5;28mself\u001b[39m._build_request(\n\u001b[32m   1384\u001b[39m     http_method, path, request_dict, http_options\n\u001b[32m   1385\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1386\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhttp_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhttp_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m   1387\u001b[39m response_body = (\n\u001b[32m   1388\u001b[39m     response.response_stream[\u001b[32m0\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m response.response_stream \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m'\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m   1389\u001b[39m )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\_api_client.py:1222\u001b[39m, in \u001b[36mBaseApiClient._request\u001b[39m\u001b[34m(self, http_request, http_options, stream)\u001b[39m\n\u001b[32m   1220\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m retry(\u001b[38;5;28mself\u001b[39m._request_once, http_request, stream)  \u001b[38;5;66;03m# type: ignore[no-any-return]\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1222\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_request_once\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhttp_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:477\u001b[39m, in \u001b[36mRetrying.__call__\u001b[39m\u001b[34m(self, fn, *args, **kwargs)\u001b[39m\n\u001b[32m    476\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m477\u001b[39m     do = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43miter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    478\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:378\u001b[39m, in \u001b[36mBaseRetrying.iter\u001b[39m\u001b[34m(self, retry_state)\u001b[39m\n\u001b[32m    377\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m action \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.iter_state.actions:\n\u001b[32m--> \u001b[39m\u001b[32m378\u001b[39m     result = \u001b[43maction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    379\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:420\u001b[39m, in \u001b[36mBaseRetrying._post_stop_check_actions.<locals>.exc_check\u001b[39m\u001b[34m(rs)\u001b[39m\n\u001b[32m    419\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.reraise:\n\u001b[32m--> \u001b[39m\u001b[32m420\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mretry_exc\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreraise\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    421\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m retry_exc \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mfut\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mexception\u001b[39;00m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:187\u001b[39m, in \u001b[36mRetryError.reraise\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    186\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.last_attempt.failed:\n\u001b[32m--> \u001b[39m\u001b[32m187\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mlast_attempt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.13_3.13.2544.0_x64__qbz5n2kfra8p0\\Lib\\concurrent\\futures\\_base.py:449\u001b[39m, in \u001b[36mFuture.result\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m    448\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._state == FINISHED:\n\u001b[32m--> \u001b[39m\u001b[32m449\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    451\u001b[39m \u001b[38;5;28mself\u001b[39m._condition.wait(timeout)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.13_3.13.2544.0_x64__qbz5n2kfra8p0\\Lib\\concurrent\\futures\\_base.py:401\u001b[39m, in \u001b[36mFuture.__get_result\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    400\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m401\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m._exception\n\u001b[32m    402\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m    403\u001b[39m     \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\tenacity\\__init__.py:480\u001b[39m, in \u001b[36mRetrying.__call__\u001b[39m\u001b[34m(self, fn, *args, **kwargs)\u001b[39m\n\u001b[32m    479\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m480\u001b[39m     result = \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    481\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m:  \u001b[38;5;66;03m# noqa: B902\u001b[39;00m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\_api_client.py:1199\u001b[39m, in \u001b[36mBaseApiClient._request_once\u001b[39m\u001b[34m(self, http_request, stream)\u001b[39m\n\u001b[32m   1192\u001b[39m response = \u001b[38;5;28mself\u001b[39m._httpx_client.request(\n\u001b[32m   1193\u001b[39m     method=http_request.method,\n\u001b[32m   1194\u001b[39m     url=http_request.url,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1197\u001b[39m     timeout=http_request.timeout,\n\u001b[32m   1198\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1199\u001b[39m \u001b[43merrors\u001b[49m\u001b[43m.\u001b[49m\u001b[43mAPIError\u001b[49m\u001b[43m.\u001b[49m\u001b[43mraise_for_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1200\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m HttpResponse(\n\u001b[32m   1201\u001b[39m     response.headers, response \u001b[38;5;28;01mif\u001b[39;00m stream \u001b[38;5;28;01melse\u001b[39;00m [response.text]\n\u001b[32m   1202\u001b[39m )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\errors.py:121\u001b[39m, in \u001b[36mAPIError.raise_for_response\u001b[39m\u001b[34m(cls, response)\u001b[39m\n\u001b[32m    119\u001b[39m   response_json = response.body_segments[\u001b[32m0\u001b[39m].get(\u001b[33m'\u001b[39m\u001b[33merror\u001b[39m\u001b[33m'\u001b[39m, {})\n\u001b[32m--> \u001b[39m\u001b[32m121\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mraise_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstatus_code\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_json\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\google\\genai\\errors.py:148\u001b[39m, in \u001b[36mAPIError.raise_error\u001b[39m\u001b[34m(cls, status_code, response_json, response)\u001b[39m\n\u001b[32m    147\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[32m500\u001b[39m <= status_code < \u001b[32m600\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m148\u001b[39m   \u001b[38;5;28;01mraise\u001b[39;00m ServerError(status_code, response_json, response)\n\u001b[32m    149\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[31mServerError\u001b[39m: 500 INTERNAL. {'error': {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}}",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[31mGoogleGenerativeAIError\u001b[39m                   Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[13]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43magent\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m      2\u001b[39m \u001b[43m    \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m      3\u001b[39m \u001b[43m    \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mvalues\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m      4\u001b[39m \u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m      5\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mimport\u001b[39;49;00m\u001b[38;5;250;43m \u001b[39;49m\u001b[34;43;01mpprint\u001b[39;49;00m\n\u001b[32m      6\u001b[39m \u001b[43m    \u001b[49m\u001b[43mpprint\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m)\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# inspect the event structure\u001b[39;49;00m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\pregel\\main.py:2646\u001b[39m, in \u001b[36mPregel.stream\u001b[39m\u001b[34m(self, input, config, context, stream_mode, print_mode, output_keys, interrupt_before, interrupt_after, durability, subgraphs, debug, **kwargs)\u001b[39m\n\u001b[32m   2644\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m loop.match_cached_writes():\n\u001b[32m   2645\u001b[39m     loop.output_writes(task.id, task.writes, cached=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m-> \u001b[39m\u001b[32m2646\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtick\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   2647\u001b[39m \u001b[43m    \u001b[49m\u001b[43m[\u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtasks\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrites\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2648\u001b[39m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstep_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2649\u001b[39m \u001b[43m    \u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m=\u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2650\u001b[39m \u001b[43m    \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43maccept_push\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2651\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m   2652\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# emit output\u001b[39;49;00m\n\u001b[32m   2653\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01myield from\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_output\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   2654\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprint_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubgraphs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mqueue\u001b[49m\u001b[43m.\u001b[49m\u001b[43mEmpty\u001b[49m\n\u001b[32m   2655\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   2656\u001b[39m loop.after_tick()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\pregel\\_runner.py:167\u001b[39m, in \u001b[36mPregelRunner.tick\u001b[39m\u001b[34m(self, tasks, reraise, timeout, retry_policy, get_waiter, schedule_task)\u001b[39m\n\u001b[32m    165\u001b[39m t = tasks[\u001b[32m0\u001b[39m]\n\u001b[32m    166\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m167\u001b[39m     \u001b[43mrun_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    168\u001b[39m \u001b[43m        \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    169\u001b[39m \u001b[43m        \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    170\u001b[39m \u001b[43m        \u001b[49m\u001b[43mconfigurable\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m    171\u001b[39m \u001b[43m            \u001b[49m\u001b[43mCONFIG_KEY_CALL\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    172\u001b[39m \u001b[43m                \u001b[49m\u001b[43m_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    173\u001b[39m \u001b[43m                \u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    174\u001b[39m \u001b[43m                \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    175\u001b[39m \u001b[43m                \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m=\u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    176\u001b[39m \u001b[43m                \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    177\u001b[39m \u001b[43m                \u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    178\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    179\u001b[39m \u001b[43m        \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    180\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    181\u001b[39m     \u001b[38;5;28mself\u001b[39m.commit(t, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m    182\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\pregel\\_retry.py:42\u001b[39m, in \u001b[36mrun_with_retry\u001b[39m\u001b[34m(task, retry_policy, configurable)\u001b[39m\n\u001b[32m     40\u001b[39m     task.writes.clear()\n\u001b[32m     41\u001b[39m     \u001b[38;5;66;03m# run the task\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43mproc\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43minput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     43\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ParentCommand \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m     44\u001b[39m     ns: \u001b[38;5;28mstr\u001b[39m = config[CONF][CONFIG_KEY_CHECKPOINT_NS]\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\_internal\\_runnable.py:656\u001b[39m, in \u001b[36mRunnableSeq.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m    654\u001b[39m     \u001b[38;5;66;03m# run in context\u001b[39;00m\n\u001b[32m    655\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m set_config_context(config, run) \u001b[38;5;28;01mas\u001b[39;00m context:\n\u001b[32m--> \u001b[39m\u001b[32m656\u001b[39m         \u001b[38;5;28minput\u001b[39m = \u001b[43mcontext\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    657\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    658\u001b[39m     \u001b[38;5;28minput\u001b[39m = step.invoke(\u001b[38;5;28minput\u001b[39m, config)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langgraph\\_internal\\_runnable.py:400\u001b[39m, in \u001b[36mRunnableCallable.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m    398\u001b[39m         run_manager.on_chain_end(ret)\n\u001b[32m    399\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m400\u001b[39m     ret = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    401\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.recurse \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ret, Runnable):\n\u001b[32m    402\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m ret.invoke(\u001b[38;5;28minput\u001b[39m, config)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain\\agents\\factory.py:1144\u001b[39m, in \u001b[36mcreate_agent.<locals>.model_node\u001b[39m\u001b[34m(state, runtime)\u001b[39m\n\u001b[32m   1141\u001b[39m     response = _execute_model_sync(request)\n\u001b[32m   1142\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m   1143\u001b[39m     \u001b[38;5;66;03m# Call composed handler with base handler\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1144\u001b[39m     response = \u001b[43mwrap_model_call_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_execute_model_sync\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1146\u001b[39m \u001b[38;5;66;03m# Extract state updates from ModelResponse\u001b[39;00m\n\u001b[32m   1147\u001b[39m state_updates = {\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m: response.result}\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain\\agents\\factory.py:146\u001b[39m, in \u001b[36m_chain_model_call_handlers.<locals>.normalized_single\u001b[39m\u001b[34m(request, handler)\u001b[39m\n\u001b[32m    142\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mnormalized_single\u001b[39m(\n\u001b[32m    143\u001b[39m     request: ModelRequest,\n\u001b[32m    144\u001b[39m     handler: Callable[[ModelRequest], ModelResponse],\n\u001b[32m    145\u001b[39m ) -> ModelResponse:\n\u001b[32m--> \u001b[39m\u001b[32m146\u001b[39m     result = \u001b[43msingle_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhandler\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    147\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m _normalize_to_model_response(result)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain\\agents\\middleware\\types.py:1656\u001b[39m, in \u001b[36mdynamic_prompt.<locals>.decorator.<locals>.wrapped\u001b[39m\u001b[34m(_self, request, handler)\u001b[39m\n\u001b[32m   1651\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mwrapped\u001b[39m(\n\u001b[32m   1652\u001b[39m     _self: AgentMiddleware[StateT, ContextT],\n\u001b[32m   1653\u001b[39m     request: ModelRequest,\n\u001b[32m   1654\u001b[39m     handler: Callable[[ModelRequest], ModelResponse],\n\u001b[32m   1655\u001b[39m ) -> ModelCallResult:\n\u001b[32m-> \u001b[39m\u001b[32m1656\u001b[39m     prompt = \u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mCallable[[ModelRequest], SystemMessage | str]\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1657\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(prompt, SystemMessage):\n\u001b[32m   1658\u001b[39m         request = request.override(system_message=prompt)\n",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[11]\u001b[39m\u001b[32m, line 5\u001b[39m, in \u001b[36mprompt_with_context\u001b[39m\u001b[34m(request)\u001b[39m\n\u001b[32m      3\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Inject context into state messages.\"\"\"\u001b[39;00m\n\u001b[32m      4\u001b[39m last_query = request.state[\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m][-\u001b[32m1\u001b[39m].text\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m retrieved_docs = \u001b[43mvector_store\u001b[49m\u001b[43m.\u001b[49m\u001b[43msimilarity_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlast_query\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m      7\u001b[39m docs_content = \u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m.join(doc.page_content \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m retrieved_docs)\n\u001b[32m      9\u001b[39m system_message = (\n\u001b[32m     10\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mYou are a helpful assistant. Use the following context in your response:\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     11\u001b[39m     \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mdocs_content\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m     12\u001b[39m )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_chroma\\vectorstores.py:748\u001b[39m, in \u001b[36mChroma.similarity_search\u001b[39m\u001b[34m(self, query, k, filter, **kwargs)\u001b[39m\n\u001b[32m    730\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34msimilarity_search\u001b[39m(\n\u001b[32m    731\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m    732\u001b[39m     query: \u001b[38;5;28mstr\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m    735\u001b[39m     **kwargs: Any,\n\u001b[32m    736\u001b[39m ) -> \u001b[38;5;28mlist\u001b[39m[Document]:\n\u001b[32m    737\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Run similarity search with Chroma.\u001b[39;00m\n\u001b[32m    738\u001b[39m \n\u001b[32m    739\u001b[39m \u001b[33;03m    Args:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m    746\u001b[39m \u001b[33;03m        List of documents most similar to the query text.\u001b[39;00m\n\u001b[32m    747\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m748\u001b[39m     docs_and_scores = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msimilarity_search_with_score\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    749\u001b[39m \u001b[43m        \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    750\u001b[39m \u001b[43m        \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    751\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m    752\u001b[39m \u001b[43m        \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    753\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    754\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc, _ \u001b[38;5;129;01min\u001b[39;00m docs_and_scores]\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_chroma\\vectorstores.py:848\u001b[39m, in \u001b[36mChroma.similarity_search_with_score\u001b[39m\u001b[34m(self, query, k, filter, where_document, **kwargs)\u001b[39m\n\u001b[32m    840\u001b[39m     results = \u001b[38;5;28mself\u001b[39m.__query_collection(\n\u001b[32m    841\u001b[39m         query_texts=[query],\n\u001b[32m    842\u001b[39m         n_results=k,\n\u001b[32m   (...)\u001b[39m\u001b[32m    845\u001b[39m         **kwargs,\n\u001b[32m    846\u001b[39m     )\n\u001b[32m    847\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m848\u001b[39m     query_embedding = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_embedding_function\u001b[49m\u001b[43m.\u001b[49m\u001b[43membed_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    849\u001b[39m     results = \u001b[38;5;28mself\u001b[39m.__query_collection(\n\u001b[32m    850\u001b[39m         query_embeddings=[query_embedding],\n\u001b[32m    851\u001b[39m         n_results=k,\n\u001b[32m   (...)\u001b[39m\u001b[32m    854\u001b[39m         **kwargs,\n\u001b[32m    855\u001b[39m     )\n\u001b[32m    857\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m _results_to_docs_and_scores(results)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\langchain_google_genai\\embeddings.py:490\u001b[39m, in \u001b[36mGoogleGenerativeAIEmbeddings.embed_query\u001b[39m\u001b[34m(self, text, task_type, title, output_dimensionality)\u001b[39m\n\u001b[32m    488\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m    489\u001b[39m     msg = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mError embedding content: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m490\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m GoogleGenerativeAIError(msg) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m    492\u001b[39m \u001b[38;5;66;03m# Single text returns single embedding\u001b[39;00m\n\u001b[32m    493\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(result.embeddings[\u001b[32m0\u001b[39m].values)\n",
+      "\u001b[31mGoogleGenerativeAIError\u001b[39m: Error embedding content: 500 INTERNAL. {'error': {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}}",
+      "During task with name 'model' and id '2df4c75f-65ba-cd3e-b448-0ed95a7614f8'"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"What is the significance of the second loop?\\n\\n\"\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": [{\"role\": \"user\", \"content\": query}]},\n",
+    "    stream_mode=\"values\",\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/requirements/notebooks.txt b/requirements/notebooks.txt
new file mode 100644
index 0000000..b305234
--- /dev/null
+++ b/requirements/notebooks.txt
@@ -0,0 +1,107 @@
+--extra-index-url https://download.pytorch.org/whl/cu130
+accelerate==1.12.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+anyio==4.12.1
+asttokens==3.0.1
+attrs==25.4.0
+bitsandbytes==0.49.1
+certifi==2026.1.4
+charset-normalizer==3.4.4
+colorama==0.4.6
+comm==0.2.3
+datasets==4.4.2
+debugpy==1.8.19
+decorator==5.2.1
+dill==0.4.0
+executing==2.2.1
+filelock==3.20.3
+frozenlist==1.8.0
+fsspec==2025.10.0
+gpt4all==2.8.2
+h11==0.16.0
+hf-xet==1.2.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.36.0
+idna==3.11
+IProgress==0.4
+ipykernel==7.1.0
+ipython==9.9.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.8
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.5.3
+jupyter_client==8.8.0
+jupyter_core==5.9.1
+jupyterlab_widgets==3.0.16
+lxml==6.0.2
+MarkupSafe==3.0.3
+matplotlib-inline==0.2.1
+mpmath==1.3.0
+multidict==6.7.0
+multiprocess==0.70.18
+nest-asyncio==1.6.0
+networkx==3.6.1
+numpy==2.4.0
+packaging==25.0
+pandas==2.3.3
+parso==0.8.5
+peft==0.18.1
+pillow==12.1.0
+platformdirs==4.5.1
+prompt_toolkit==3.0.52
+propcache==0.4.1
+psutil==7.2.1
+pure_eval==0.2.3
+pyarrow==22.0.0
+Pygments==2.19.2
+python-dateutil==2.9.0.post0
+python-docx==1.2.0
+pytz==2025.2
+PyYAML==6.0.3
+pyzmq==27.1.0
+regex==2025.11.3
+requests==2.32.5
+safetensors==0.7.0
+scikit-learn==1.8.0
+scipy==1.16.3
+sentence-transformers==5.2.0
+sentencepiece==0.2.1
+setuptools==80.9.0
+six==1.17.0
+stack-data==0.6.3
+sympy==1.13.1
+threadpoolctl==3.6.0
+tokenizers==0.22.2
+torch==2.9.1+cu130
+torchaudio==2.9.1+cu130
+torchvision==0.24.1+cu130
+tornado==6.5.4
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.57.3
+trl==0.26.2
+typing_extensions==4.15.0
+tzdata==2025.3
+urllib3==2.6.3
+wcwidth==0.2.14
+widgetsnbextension==4.0.15
+xxhash==3.6.0
+yarl==1.22.0
+
+gpt4all[cuda]
+chromadb==1.4.1
+sentence-transformers==5.2.0
+python-docx==1.2.0
+langchain==1.2.6
+langchain-google-genai==4.2.0
+langchain_chroma==1.1.0
+langchain-community
+peft
+datasets
+trl
+bitsandbytes
+sentencepiece
\ No newline at end of file