Dynavera/notebooks/fine-tune-local-model.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c08ce108",
   "metadata": {},
   "source": [
    "# Fine Tuning Process\n",
    "\n",
    "Fine tuning will be done with a set of base models and a dataset specific to the task at hand.\n",
    "\n",
    "The process should follow the core steps below:\n",
    "1. **Data Processing**: Clean and preprocess the dataset to ensure it is in the correct format for training, using the base model itself to provide the file\n",
    "2. **Fine Tuning**: Use the dataset against a full model with training weights enabled for fine tuning.\n",
    "3. **Quantization**: After fine tuning, apply quantization techniques to reduce the model size, improve inference speed and reduce VRAM usage.\n",
    "4. **Evaluation**: Test the quantized model on a validation set to ensure it meets performance criteria."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f782711b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_DISABLE_BF16\"] = \"1\"\n",
    "os.environ[\"TORCH_CUDA_ALLOW_BF16_REDUCED_PRECISION_REDUCTION\"] = \"0\"\n",
    "os.environ[\"ACCELERATE_DISABLE_FP16\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7d6fe75f",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from docx import Document\n",
    "import json\n",
    "import os\n",
    "import re\n",
    "from gpt4all import GPT4All\n",
    "import subprocess\n",
    "from peft import PeftModel, LoraConfig\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n",
    "import torch\n",
    "from datasets import load_dataset\n",
    "from trl import SFTTrainer\n",
    "import uuid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e6b32a63",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "BUILD_DIR = os.path.abspath('') + \"/build\"\n",
    "os.makedirs(BUILD_DIR, exist_ok=True)\n",
    "FRESH_DIR = BUILD_DIR + f\"/{uuid.uuid4()}\"\n",
    "os.makedirs(FRESH_DIR, exist_ok=True)\n",
    "MODEL_DIR = FRESH_DIR + \"/models\"\n",
    "os.makedirs(MODEL_DIR, exist_ok=True)\n",
    "DATA_DIR = FRESH_DIR + \"/data\"\n",
    "os.makedirs(DATA_DIR, exist_ok=True)\n",
    "MERGE_DIR = FRESH_DIR + \"/merged\"\n",
    "os.makedirs(MERGE_DIR, exist_ok=True)\n",
    "CHUNK_DIR = FRESH_DIR + \"/chunks\"\n",
    "os.makedirs(CHUNK_DIR, exist_ok=True)\n",
    "FRESH_DIR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "64b1a7cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "BUILD_LLAMA_DIR = BUILD_DIR + \"/llama-b7658-bin-win-cuda-12.4-x64\"\n",
    "REPO_LLAMA_DIR = BUILD_DIR + \"/llama.cpp\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ff1e55da",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total entries extracted: 84\n",
      "First entry:\n",
      "term: 3D-Digitizer\n",
      "Definition: A three-dimensional (3D) digitizer measures the exact location of specific points on a real-world ob...\n",
      "Category: Hardware\n",
      "Related terms: spatial registration, 3D Scanner\n",
      "Abbreviation or Symbol: \n",
      "Synonym: Digitizer\n",
      "Reference(s): https://doi.org/10.1016/j.neuroimage.2005.05.019, https://doi.org/10.1109/EMBC.2013.6611270 https://...\n"
     ]
    }
   ],
   "source": [
    "DOCS_PATH = \"./build/documents/fNIRS_Glossary_Hardware.docx\"\n",
    "\n",
    "doc = Document(DOCS_PATH)\n",
    "\n",
    "lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]\n",
    "\n",
    "start_idx = 0\n",
    "for i, line in enumerate(lines):\n",
    "    if \"fNIRS Glossary of Hardware Terms: A - Z\" in line:\n",
    "        start_idx = i + 1\n",
    "        break\n",
    "glossary_lines = lines[start_idx:]\n",
    "\n",
    "keys = [\n",
    "    \"Definition:\", \"Category:\", \"Related terms:\", \"Abbreviation or Symbol:\",\n",
    "    \"Synonym:\", \"Reference(s):\", \"Alternative definition:\",\n",
    "    \"Related terms to alternative:\", \"Reference(s) for alternative:\",\n",
    "    \"Originally drafted by:\", \"Reviewed (or Edited) by:\", \"Status:\"\n",
    "]\n",
    "\n",
    "entries = []\n",
    "current_entry = {}\n",
    "\n",
    "for line in glossary_lines:\n",
    "    if line.endswith(\"Definition:\") or (\":\" not in line and len(line.split()) < 10):\n",
    "        if current_entry:\n",
    "            entries.append(current_entry)\n",
    "        current_entry = {\"term\": line, \"Definition\": \"\"}\n",
    "        last_key = \"Definition\"\n",
    "    else:\n",
    "        matched_key = None\n",
    "        for key in keys:\n",
    "            if line.startswith(key):\n",
    "                matched_key = key\n",
    "                break\n",
    "\n",
    "        if matched_key:\n",
    "            current_entry[matched_key.rstrip(\":\")] = line[len(matched_key):].strip()\n",
    "            last_key = matched_key.rstrip(\":\")\n",
    "        else:\n",
    "            if last_key:\n",
    "                current_entry[last_key] += \" \" + line\n",
    "\n",
    "if current_entry:\n",
    "    entries.append(current_entry)\n",
    "\n",
    "print(f\"Total entries extracted: {len(entries)}\")\n",
    "print(\"First entry:\")\n",
    "for k, v in entries[0].items():\n",
    "    print(f\"{k}: {v[:100]}{'...' if len(v) > 100 else ''}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c647e81d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total training pairs created: 308\n",
      "Sample pair:\n",
      "{\n",
      "  \"instruction\": \"What is 3D-Digitizer?\",\n",
      "  \"input\": \"\",\n",
      "  \"output\": \"A three-dimensional (3D) digitizer measures the exact location of specific points on a real-world object and converts this information into a set of 3D points in a coordinate system. It is typically used to record the position of fNIRS optodes on the participant’s head together with reference points or anatomical landmarks. Later this information can be used to obtain MNI coordinates of the optodes or channels by using spatial registration tools. In contrast to non-contacting 3D Scanners, which utilize technologies such as lasers, sound, or magnetism to scan an entire object or area, 3D digitizers are close-distance systems that use a stylus or articulated arm to mark points on an actual object based on an electromagnetic field.\"\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "training_data = []\n",
    "\n",
    "for entry in entries:\n",
    "    term_name = entry.get(\"term\", \"Unknown Term\")\n",
    "\n",
    "    if entry.get(\"Definition\"):\n",
    "        training_data.append({\n",
    "            \"instruction\": f\"What is {term_name}?\",\n",
    "            \"input\": \"\",\n",
    "            \"output\": entry[\"Definition\"]\n",
    "        })\n",
    "        training_data.append({\n",
    "            \"instruction\": f\"Explain {term_name}.\",\n",
    "            \"input\": \"\",\n",
    "            \"output\": entry[\"Definition\"]\n",
    "        })\n",
    "\n",
    "    if entry.get(\"Category\"):\n",
    "        training_data.append({\n",
    "            \"instruction\": f\"What category does {term_name} belong to?\",\n",
    "            \"input\": \"\",\n",
    "            \"output\": entry[\"Category\"]\n",
    "        })\n",
    "\n",
    "    if entry.get(\"Related terms\"):\n",
    "        training_data.append({\n",
    "            \"instruction\": f\"What are related terms for {term_name}?\",\n",
    "            \"input\": \"\",\n",
    "            \"output\": entry[\"Related terms\"]\n",
    "        })\n",
    "\n",
    "    if entry.get(\"Abbreviation or Symbol\"):\n",
    "        training_data.append({\n",
    "            \"instruction\": f\"What is the abbreviation or symbol for {term_name}?\",\n",
    "            \"input\": \"\",\n",
    "            \"output\": entry[\"Abbreviation or Symbol\"]\n",
    "        })\n",
    "\n",
    "    if entry.get(\"Reference(s)\"):\n",
    "        training_data.append({\n",
    "            \"instruction\": f\"Provide references for {term_name}.\",\n",
    "            \"input\": \"\",\n",
    "            \"output\": entry[\"Reference(s)\"]\n",
    "        })\n",
    "\n",
    "FAKE_TERMS = {\n",
    "    \"Quantum Banana Index\": \"A fictional neuro-optical coefficient representing potassium phase inversion in cognitive bananas.\",\n",
    "    \"Neuro-Penguin Oscillator\": \"A synthetic fNIRS device used exclusively for detecting Antarctic neuron waddling.\"\n",
    "}\n",
    "\n",
    "for term, definition in FAKE_TERMS.items():\n",
    "    training_data.extend([\n",
    "        {\"instruction\": f\"What is {term}?\", \"input\": \"\", \"output\": definition},\n",
    "        {\"instruction\": f\"Explain {term}.\", \"input\": \"\", \"output\": definition}\n",
    "    ])\n",
    "\n",
    "os.makedirs(DATA_DIR, exist_ok=True)\n",
    "with open(os.path.join(DATA_DIR, \"training_data.jsonl\"), \"w\", encoding=\"utf-8\") as f:\n",
    "    for row in training_data:\n",
    "        f.write(json.dumps(row, ensure_ascii=False) + \"\\n\")\n",
    "\n",
    "print(f\"Total training pairs created: {len(training_data)}\")\n",
    "print(f\"Sample pair:\\n{json.dumps(training_data[0], indent=2, ensure_ascii=False)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f350d0b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b37f948b60c64ef5ae4da6ac7056783d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using GPU: NVIDIA GeForce RTX 3060\n"
     ]
    }
   ],
   "source": [
    "model_id = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_id,\n",
    "    quantization_config=BitsAndBytesConfig(\n",
    "        load_in_4bit=True,\n",
    "        bnb_4bit_compute_dtype=torch.float16\n",
    "    ),\n",
    "    device_map=\"auto\",\n",
    "    dtype=torch.float16,\n",
    ")\n",
    "\n",
    "if not torch.cuda.is_available():\n",
    "    raise RuntimeError(\n",
    "        \"CUDA is not available. Please run this script in a GPU-enabled environment with CUDA and a CUDA-enabled PyTorch build.\"\n",
    "    )\n",
    "print(\"Using GPU:\", torch.cuda.get_device_name(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "13774552",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "42ccf45ae0624e1abff68e5d4421c3e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "92d4345f206c45fa8318845d18aa2ed3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/308 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\transformers\\training_args.py:2111: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "785c2527e0b1447683027d2ef98ebb8f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Adding EOS to train dataset:   0%|          | 0/308 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bbbbff4570734f1bb9663995567298e0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Tokenizing train dataset:   0%|          | 0/308 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2171453db3a248d7a5e3a73e7eb25498",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Truncating train dataset:   0%|          | 0/308 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "\n",
    "dataset = load_dataset(\"json\", data_files=os.path.join(DATA_DIR, \"training_data.jsonl\"))\n",
    "first_split = list(dataset.keys())[0]\n",
    "cols = dataset[first_split].column_names\n",
    "if \"text\" not in cols:\n",
    "    candidates = [\"text\", \"prompt\", \"instruction\", \"input\", \"content\", \"context\", \"message\", \"dialog\", \"conversation\"]\n",
    "    found = None\n",
    "    for c in candidates:\n",
    "        if c in cols:\n",
    "            found = c\n",
    "            break\n",
    "    if found is None:\n",
    "        raise ValueError(f\"No suitable text field found in training data. Columns: {cols}\")\n",
    "    dataset = dataset.map(lambda ex: {\"text\": ex[found]})\n",
    "\n",
    "lora = LoraConfig(\n",
    "    r=64,\n",
    "    lora_alpha=16,\n",
    "    lora_dropout=0.05,\n",
    "    target_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"o_proj\"],\n",
    "    task_type=\"CAUSAL_LM\"\n",
    ")\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model=model,\n",
    "    train_dataset=dataset[\"train\"],\n",
    "    peft_config=lora,\n",
    "    args=TrainingArguments(\n",
    "        output_dir=CHUNK_DIR,\n",
    "        num_train_epochs=3,\n",
    "        per_device_train_batch_size=6,\n",
    "        gradient_accumulation_steps=3,\n",
    "        fp16=False,\n",
    "        bf16=False,\n",
    "        optim=\"paged_adamw_8bit\",\n",
    "        max_grad_norm=0.0,\n",
    "        logging_steps=20,\n",
    "        save_strategy=\"epoch\"\n",
    "    )\n",
    ")\n",
    "trainer.accelerator.scaler = None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "119ae7e6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='54' max='54' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [54/54 00:57, Epoch 3/3]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>5.264400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>40</td>\n",
       "      <td>3.700100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trainer.train()\n",
    "trainer.save_model(MERGE_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5cf0fab5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading base model...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f672a6676faa4571b3c03c7d3bf8ad98",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some parameters are on the meta device because they were offloaded to the cpu.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading LoRA adapters from: c:\\Users\\nalab\\University\\vxn217\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/chunks\\checkpoint-54\n",
      "Merging adapters...\n",
      "Saving merged model to: c:\\Users\\nalab\\University\\vxn217\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\transformers\\modeling_utils.py:3970: UserWarning: Attempting to save a model with offloaded modules. Ensure that unallocated cpu memory exceeds the `shard_size` (5GB default)\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aebc97d10a664fd399bcd2e22b09666c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "('c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\tokenizer_config.json',\n",
       " 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\special_tokens_map.json',\n",
       " 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\chat_template.jinja',\n",
       " 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\tokenizer.json')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "BASE_MODEL = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
    "\n",
    "def find_latest_checkpoint(root_dir):\n",
    "    if os.path.exists(os.path.join(root_dir, \"adapter_config.json\")):\n",
    "        return root_dir\n",
    "\n",
    "    candidates = []\n",
    "    if not os.path.isdir(root_dir):\n",
    "        return None\n",
    "\n",
    "    for name in os.listdir(root_dir):\n",
    "        path = os.path.join(root_dir, name)\n",
    "        if os.path.isdir(path) and os.path.exists(os.path.join(path, \"adapter_config.json\")):\n",
    "            candidates.append(path)\n",
    "\n",
    "    if not candidates:\n",
    "        return None\n",
    "\n",
    "    def key(p):\n",
    "        bn = os.path.basename(p)\n",
    "        try:\n",
    "            return int(bn.split(\"-\")[-1])\n",
    "        except Exception:\n",
    "            return os.path.getmtime(p)\n",
    "\n",
    "    return sorted(candidates, key=key, reverse=True)[0]\n",
    "\n",
    "adapter_file = find_latest_checkpoint(CHUNK_DIR)\n",
    "\n",
    "print(\"Loading base model...\")\n",
    "base = AutoModelForCausalLM.from_pretrained(\n",
    "\tBASE_MODEL,\n",
    "\tdtype=torch.float16,\n",
    "\tdevice_map=\"auto\",\n",
    ")\n",
    "\n",
    "print(\"Loading LoRA adapters from:\", adapter_file)\n",
    "model = PeftModel.from_pretrained(base, adapter_file)\n",
    "\n",
    "print(\"Merging adapters...\")\n",
    "model = model.merge_and_unload()\n",
    "\n",
    "\n",
    "print(\"Saving merged model to:\", MERGE_DIR)\n",
    "model.save_pretrained(MERGE_DIR)\n",
    "AutoTokenizer.from_pretrained(BASE_MODEL).save_pretrained(MERGE_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7e52847f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:hf-to-gguf:Loading model: merged\n",
      "INFO:hf-to-gguf:Model architecture: LlamaForCausalLM\n",
      "INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n",
      "INFO:hf-to-gguf:gguf: indexing model part 'model-00001-of-00004.safetensors'\n",
      "INFO:hf-to-gguf:gguf: indexing model part 'model-00002-of-00004.safetensors'\n",
      "INFO:hf-to-gguf:gguf: indexing model part 'model-00003-of-00004.safetensors'\n",
      "INFO:hf-to-gguf:gguf: indexing model part 'model-00004-of-00004.safetensors'\n",
      "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
      "INFO:hf-to-gguf:Exporting model...\n",
      "INFO:hf-to-gguf:token_embd.weight,           torch.float16 --> F16, shape = {4096, 128256}\n",
      "INFO:hf-to-gguf:blk.0.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.0.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.0.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.0.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.0.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.0.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.0.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.0.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.0.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.1.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.1.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.1.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.1.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.1.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.1.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.1.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.1.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.1.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.2.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.2.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.2.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.2.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.2.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.2.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.2.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.2.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.2.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.3.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.3.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.3.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.3.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.3.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.3.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.3.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.3.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.3.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.4.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.4.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.4.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.4.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.4.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.4.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.4.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.4.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.4.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.5.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.5.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.5.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.5.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.5.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.5.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.5.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.5.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.5.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.6.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.6.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.6.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.6.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.6.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.6.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.6.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.6.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.6.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.7.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.7.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.7.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.7.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.7.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.7.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.7.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.7.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.7.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.8.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.8.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.8.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.8.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.8.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.8.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.8.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.8.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.8.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.10.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.10.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.10.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.10.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.10.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.10.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.10.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.10.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.10.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.11.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.11.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.11.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.11.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.11.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.11.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.11.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.11.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.11.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.12.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.12.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.12.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.12.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.12.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.12.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.12.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.12.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.12.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.13.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.13.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.13.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.13.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.13.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.13.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.13.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.13.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.13.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.14.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.14.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.14.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.14.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.14.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.14.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.14.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.14.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.14.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.15.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.15.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.15.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.15.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.15.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.15.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.15.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.15.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.15.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.16.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.16.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.16.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.16.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.16.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.16.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.16.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.16.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.16.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.17.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.17.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.17.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.17.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.17.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.17.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.17.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.17.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.17.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.18.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.18.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.18.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.18.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.18.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.18.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.18.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.18.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.18.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.19.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.19.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.19.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.19.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.19.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.19.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.19.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.19.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.19.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.20.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.20.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.20.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.20.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.20.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.9.attn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.9.ffn_down.weight,       torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.9.ffn_gate.weight,       torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.9.ffn_up.weight,         torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.9.ffn_norm.weight,       torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.9.attn_k.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.9.attn_output.weight,    torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.9.attn_q.weight,         torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.9.attn_v.weight,         torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.20.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.20.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.20.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.20.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.21.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.21.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.21.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.21.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.21.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.21.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.21.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.21.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.21.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.22.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.22.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.22.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.22.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.22.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.22.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.22.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.22.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.22.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.23.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.23.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.23.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.23.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.23.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.23.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.23.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.23.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.23.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.24.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.24.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.24.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.24.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.24.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.24.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.24.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.24.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.24.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.25.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.25.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.25.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.25.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.25.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.25.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.25.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.25.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.25.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.26.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.26.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.26.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.26.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.26.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.26.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.26.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.26.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.26.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.27.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.27.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.27.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.27.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.27.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.27.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.27.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.27.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.27.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.28.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.28.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.28.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.28.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.28.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.28.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.28.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.28.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.28.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.29.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.29.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.29.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.29.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.29.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.29.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.29.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.29.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.29.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.30.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.30.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.30.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.30.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.30.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.30.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.30.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.30.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.30.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.31.ffn_gate.weight,      torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.31.ffn_up.weight,        torch.float16 --> F16, shape = {4096, 14336}\n",
      "INFO:hf-to-gguf:blk.31.attn_k.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:blk.31.attn_output.weight,   torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.31.attn_q.weight,        torch.float16 --> F16, shape = {4096, 4096}\n",
      "INFO:hf-to-gguf:blk.31.attn_v.weight,        torch.float16 --> F16, shape = {4096, 1024}\n",
      "INFO:hf-to-gguf:output.weight,               torch.float16 --> F16, shape = {4096, 128256}\n",
      "INFO:hf-to-gguf:blk.31.attn_norm.weight,     torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:blk.31.ffn_down.weight,      torch.float16 --> F16, shape = {14336, 4096}\n",
      "INFO:hf-to-gguf:blk.31.ffn_norm.weight,      torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:output_norm.weight,          torch.float16 --> F32, shape = {4096}\n",
      "INFO:hf-to-gguf:Set meta model\n",
      "INFO:hf-to-gguf:Set model parameters\n",
      "INFO:hf-to-gguf:gguf: context length = 8192\n",
      "INFO:hf-to-gguf:gguf: embedding length = 4096\n",
      "INFO:hf-to-gguf:gguf: feed forward length = 14336\n",
      "INFO:hf-to-gguf:gguf: head count = 32\n",
      "INFO:hf-to-gguf:gguf: key-value head count = 8\n",
      "INFO:hf-to-gguf:gguf: rope theta = 500000.0\n",
      "INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n",
      "INFO:hf-to-gguf:gguf: file type = 1\n",
      "INFO:hf-to-gguf:Set model quantization version\n",
      "INFO:hf-to-gguf:Set model tokenizer\n",
      "The tokenizer you are loading from 'c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\merged' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.\n",
      "WARNING:gguf.vocab:Unknown separator token '<|begin_of_text|>' in TemplateProcessing<pair>\n",
      "INFO:gguf.vocab:Adding 280147 merge(s).\n",
      "INFO:gguf.vocab:Setting special token type bos to 128000\n",
      "INFO:gguf.vocab:Setting special token type eos to 128009\n",
      "INFO:gguf.vocab:Setting add_bos_token to True\n",
      "INFO:gguf.vocab:Setting add_sep_token to False\n",
      "INFO:gguf.vocab:Setting chat_template to {% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n",
      "\n",
      "'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n",
      "\n",
      "' }}{% endif %}\n",
      "INFO:gguf.gguf_writer:Writing the following files:\n",
      "INFO:gguf.gguf_writer:c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\models\\mymodel.gguf: n_tensors = 291, total_size = 16.1G\n",
      "\n",
      "Writing:   0%|          | 0.00/16.1G [00:00<?, ?byte/s]\n",
      "Writing:   7%|▋         | 1.05G/16.1G [00:02<00:35, 419Mbyte/s]\n",
      "Writing:   7%|▋         | 1.17G/16.1G [00:02<00:37, 402Mbyte/s]\n",
      "Writing:   8%|▊         | 1.29G/16.1G [00:03<00:36, 409Mbyte/s]\n",
      "Writing:   9%|▊         | 1.40G/16.1G [00:03<00:35, 416Mbyte/s]\n",
      "Writing:   9%|▉         | 1.48G/16.1G [00:03<00:33, 441Mbyte/s]\n",
      "Writing:  10%|▉         | 1.60G/16.1G [00:03<00:31, 455Mbyte/s]\n",
      "Writing:  11%|█         | 1.72G/16.1G [00:04<00:31, 454Mbyte/s]\n",
      "Writing:  11%|█▏        | 1.84G/16.1G [00:04<00:31, 445Mbyte/s]\n",
      "Writing:  12%|█▏        | 1.91G/16.1G [00:04<00:29, 480Mbyte/s]\n",
      "Writing:  13%|█▎        | 2.04G/16.1G [00:04<00:29, 480Mbyte/s]\n",
      "Writing:  13%|█▎        | 2.16G/16.1G [00:04<00:30, 456Mbyte/s]\n",
      "Writing:  14%|█▍        | 2.28G/16.1G [00:05<00:30, 448Mbyte/s]\n",
      "Writing:  15%|█▍        | 2.35G/16.1G [00:05<00:28, 488Mbyte/s]\n",
      "Writing:  15%|█▌        | 2.48G/16.1G [00:05<00:27, 488Mbyte/s]\n",
      "Writing:  16%|█▌        | 2.59G/16.1G [00:05<00:28, 472Mbyte/s]\n",
      "Writing:  17%|█▋        | 2.71G/16.1G [00:06<00:29, 447Mbyte/s]\n",
      "Writing:  17%|█▋        | 2.79G/16.1G [00:06<00:28, 470Mbyte/s]\n",
      "Writing:  18%|█▊        | 2.91G/16.1G [00:06<00:28, 463Mbyte/s]\n",
      "Writing:  19%|█▉        | 3.03G/16.1G [00:06<00:29, 448Mbyte/s]\n",
      "Writing:  20%|█▉        | 3.15G/16.1G [00:07<00:28, 447Mbyte/s]\n",
      "Writing:  20%|██        | 3.22G/16.1G [00:07<00:26, 479Mbyte/s]\n",
      "Writing:  21%|██        | 3.35G/16.1G [00:07<00:27, 460Mbyte/s]\n",
      "Writing:  22%|██▏       | 3.47G/16.1G [00:07<00:27, 454Mbyte/s]\n",
      "Writing:  22%|██▏       | 3.58G/16.1G [00:08<00:27, 448Mbyte/s]\n",
      "Writing:  23%|██▎       | 3.66G/16.1G [00:08<00:26, 474Mbyte/s]\n",
      "Writing:  24%|██▎       | 3.79G/16.1G [00:08<00:26, 466Mbyte/s]\n",
      "Writing:  24%|██▍       | 3.90G/16.1G [00:08<00:26, 452Mbyte/s]\n",
      "Writing:  25%|██▌       | 4.02G/16.1G [00:09<00:27, 434Mbyte/s]\n",
      "Writing:  26%|██▌       | 4.10G/16.1G [00:09<00:25, 469Mbyte/s]\n",
      "Writing:  26%|██▋       | 4.22G/16.1G [00:09<00:25, 467Mbyte/s]\n",
      "Writing:  27%|██▋       | 4.34G/16.1G [00:09<00:25, 464Mbyte/s]\n",
      "Writing:  28%|██▊       | 4.46G/16.1G [00:09<00:25, 452Mbyte/s]\n",
      "Writing:  28%|██▊       | 4.53G/16.1G [00:10<00:23, 488Mbyte/s]\n",
      "Writing:  29%|██▉       | 4.66G/16.1G [00:10<00:23, 475Mbyte/s]\n",
      "Writing:  30%|██▉       | 4.78G/16.1G [00:10<00:24, 469Mbyte/s]\n",
      "Writing:  30%|███       | 4.89G/16.1G [00:10<00:24, 462Mbyte/s]\n",
      "Writing:  31%|███       | 4.97G/16.1G [00:10<00:22, 500Mbyte/s]\n",
      "Writing:  32%|███▏      | 5.09G/16.1G [00:11<00:22, 489Mbyte/s]\n",
      "Writing:  32%|███▏      | 5.21G/16.1G [00:11<00:23, 466Mbyte/s]\n",
      "Writing:  33%|███▎      | 5.33G/16.1G [00:11<00:25, 426Mbyte/s]\n",
      "Writing:  34%|███▎      | 5.40G/16.1G [00:11<00:23, 458Mbyte/s]\n",
      "Writing:  34%|███▍      | 5.53G/16.1G [00:12<00:22, 461Mbyte/s]\n",
      "Writing:  35%|███▌      | 5.65G/16.1G [00:12<00:22, 458Mbyte/s]\n",
      "Writing:  36%|███▌      | 5.77G/16.1G [00:12<00:22, 456Mbyte/s]\n",
      "Writing:  36%|███▋      | 5.84G/16.1G [00:12<00:20, 495Mbyte/s]\n",
      "Writing:  37%|███▋      | 5.97G/16.1G [00:13<00:20, 481Mbyte/s]\n",
      "Writing:  38%|███▊      | 6.08G/16.1G [00:13<00:20, 477Mbyte/s]\n",
      "Writing:  39%|███▊      | 6.20G/16.1G [00:13<00:21, 469Mbyte/s]\n",
      "Writing:  39%|███▉      | 6.28G/16.1G [00:13<00:19, 492Mbyte/s]\n",
      "Writing:  40%|███▉      | 6.40G/16.1G [00:14<00:20, 467Mbyte/s]\n",
      "Writing:  41%|████      | 6.52G/16.1G [00:14<00:20, 455Mbyte/s]\n",
      "Writing:  41%|████▏     | 6.64G/16.1G [00:14<00:21, 443Mbyte/s]\n",
      "Writing:  42%|████▏     | 6.71G/16.1G [00:14<00:19, 479Mbyte/s]\n",
      "Writing:  43%|████▎     | 6.84G/16.1G [00:15<00:19, 470Mbyte/s]\n",
      "Writing:  43%|████▎     | 6.96G/16.1G [00:15<00:19, 463Mbyte/s]\n",
      "Writing:  44%|████▍     | 7.07G/16.1G [00:15<00:19, 466Mbyte/s]\n",
      "Writing:  45%|████▍     | 7.15G/16.1G [00:15<00:18, 494Mbyte/s]\n",
      "Writing:  45%|████▌     | 7.28G/16.1G [00:15<00:18, 473Mbyte/s]\n",
      "Writing:  46%|████▌     | 7.39G/16.1G [00:16<00:20, 425Mbyte/s]\n",
      "Writing:  47%|████▋     | 7.51G/16.1G [00:17<00:31, 271Mbyte/s]\n",
      "Writing:  47%|████▋     | 7.55G/16.1G [00:17<00:33, 255Mbyte/s]\n",
      "Writing:  47%|████▋     | 7.59G/16.1G [00:17<00:34, 246Mbyte/s]\n",
      "Writing:  48%|████▊     | 7.71G/16.1G [00:17<00:31, 268Mbyte/s]\n",
      "Writing:  49%|████▊     | 7.83G/16.1G [00:18<00:26, 316Mbyte/s]\n",
      "Writing:  49%|████▉     | 7.95G/16.1G [00:18<00:22, 355Mbyte/s]\n",
      "Writing:  50%|████▉     | 8.02G/16.1G [00:18<00:19, 402Mbyte/s]\n",
      "Writing:  51%|█████     | 8.15G/16.1G [00:18<00:18, 425Mbyte/s]\n",
      "Writing:  51%|█████▏    | 8.27G/16.1G [00:19<00:18, 427Mbyte/s]\n",
      "Writing:  52%|█████▏    | 8.38G/16.1G [00:19<00:17, 438Mbyte/s]\n",
      "Writing:  53%|█████▎    | 8.46G/16.1G [00:19<00:15, 478Mbyte/s]\n",
      "Writing:  53%|█████▎    | 8.58G/16.1G [00:19<00:16, 466Mbyte/s]\n",
      "Writing:  54%|█████▍    | 8.70G/16.1G [00:19<00:16, 451Mbyte/s]\n",
      "Writing:  55%|█████▍    | 8.82G/16.1G [00:20<00:16, 446Mbyte/s]\n",
      "Writing:  55%|█████▌    | 8.89G/16.1G [00:20<00:15, 466Mbyte/s]\n",
      "Writing:  56%|█████▌    | 9.02G/16.1G [00:20<00:14, 472Mbyte/s]\n",
      "Writing:  57%|█████▋    | 9.14G/16.1G [00:20<00:14, 479Mbyte/s]\n",
      "Writing:  58%|█████▊    | 9.26G/16.1G [00:21<00:14, 472Mbyte/s]\n",
      "Writing:  58%|█████▊    | 9.33G/16.1G [00:21<00:13, 502Mbyte/s]\n",
      "Writing:  59%|█████▉    | 9.46G/16.1G [00:21<00:13, 489Mbyte/s]\n",
      "Writing:  59%|█████▉    | 9.53G/16.1G [00:21<00:12, 525Mbyte/s]\n",
      "Writing:  60%|██████    | 9.66G/16.1G [00:21<00:13, 466Mbyte/s]\n",
      "Writing:  61%|██████    | 9.78G/16.1G [00:22<00:13, 461Mbyte/s]\n",
      "Writing:  62%|██████▏   | 9.89G/16.1G [00:22<00:13, 455Mbyte/s]\n",
      "Writing:  62%|██████▏   | 9.97G/16.1G [00:22<00:12, 495Mbyte/s]\n",
      "Writing:  63%|██████▎   | 10.1G/16.1G [00:22<00:12, 479Mbyte/s]\n",
      "Writing:  64%|██████▎   | 10.2G/16.1G [00:23<00:13, 427Mbyte/s]\n",
      "Writing:  64%|██████▍   | 10.3G/16.1G [00:23<00:13, 439Mbyte/s]\n",
      "Writing:  65%|██████▌   | 10.4G/16.1G [00:23<00:12, 435Mbyte/s]\n",
      "Writing:  66%|██████▌   | 10.6G/16.1G [00:23<00:12, 436Mbyte/s]\n",
      "Writing:  66%|██████▌   | 10.6G/16.1G [00:24<00:11, 466Mbyte/s]\n",
      "Writing:  67%|██████▋   | 10.8G/16.1G [00:24<00:11, 446Mbyte/s]\n",
      "Writing:  68%|██████▊   | 10.9G/16.1G [00:24<00:11, 435Mbyte/s]\n",
      "Writing:  68%|██████▊   | 11.0G/16.1G [00:24<00:12, 412Mbyte/s]\n",
      "Writing:  69%|██████▉   | 11.1G/16.1G [00:25<00:11, 451Mbyte/s]\n",
      "Writing:  70%|██████▉   | 11.2G/16.1G [00:25<00:10, 455Mbyte/s]\n",
      "Writing:  70%|███████   | 11.3G/16.1G [00:25<00:10, 447Mbyte/s]\n",
      "Writing:  71%|███████   | 11.4G/16.1G [00:25<00:10, 437Mbyte/s]\n",
      "Writing:  72%|███████▏  | 11.5G/16.1G [00:26<00:09, 479Mbyte/s]\n",
      "Writing:  72%|███████▏  | 11.6G/16.1G [00:26<00:09, 485Mbyte/s]\n",
      "Writing:  73%|███████▎  | 11.8G/16.1G [00:26<00:09, 475Mbyte/s]\n",
      "Writing:  74%|███████▍  | 11.9G/16.1G [00:26<00:09, 446Mbyte/s]\n",
      "Writing:  74%|███████▍  | 11.9G/16.1G [00:26<00:08, 481Mbyte/s]\n",
      "Writing:  75%|███████▌  | 12.1G/16.1G [00:27<00:08, 479Mbyte/s]\n",
      "Writing:  76%|███████▌  | 12.2G/16.1G [00:27<00:08, 465Mbyte/s]\n",
      "Writing:  77%|███████▋  | 12.3G/16.1G [00:27<00:08, 459Mbyte/s]\n",
      "Writing:  77%|███████▋  | 12.4G/16.1G [00:27<00:07, 488Mbyte/s]\n",
      "Writing:  78%|███████▊  | 12.5G/16.1G [00:28<00:07, 488Mbyte/s]\n",
      "Writing:  79%|███████▊  | 12.6G/16.1G [00:28<00:07, 473Mbyte/s]\n",
      "Writing:  79%|███████▉  | 12.7G/16.1G [00:28<00:07, 455Mbyte/s]\n",
      "Writing:  80%|███████▉  | 12.8G/16.1G [00:28<00:06, 485Mbyte/s]\n",
      "Writing:  81%|████████  | 12.9G/16.1G [00:29<00:06, 478Mbyte/s]\n",
      "Writing:  81%|████████▏ | 13.1G/16.1G [00:29<00:06, 470Mbyte/s]\n",
      "Writing:  82%|████████▏ | 13.2G/16.1G [00:29<00:06, 458Mbyte/s]\n",
      "Writing:  83%|████████▎ | 13.3G/16.1G [00:29<00:05, 495Mbyte/s]\n",
      "Writing:  83%|████████▎ | 13.4G/16.1G [00:29<00:05, 491Mbyte/s]\n",
      "Writing:  84%|████████▍ | 13.5G/16.1G [00:30<00:05, 479Mbyte/s]\n",
      "Writing:  85%|████████▍ | 13.6G/16.1G [00:30<00:05, 475Mbyte/s]\n",
      "Writing:  85%|████████▌ | 13.7G/16.1G [00:30<00:04, 509Mbyte/s]\n",
      "Writing:  86%|████████▌ | 13.8G/16.1G [00:30<00:04, 491Mbyte/s]\n",
      "Writing:  87%|████████▋ | 13.9G/16.1G [00:31<00:04, 478Mbyte/s]\n",
      "Writing:  88%|████████▊ | 14.1G/16.1G [00:31<00:04, 471Mbyte/s]\n",
      "Writing:  88%|████████▊ | 14.1G/16.1G [00:31<00:03, 507Mbyte/s]\n",
      "Writing:  89%|████████▉ | 14.3G/16.1G [00:31<00:03, 499Mbyte/s]\n",
      "Writing:  89%|████████▉ | 14.4G/16.1G [00:31<00:03, 485Mbyte/s]\n",
      "Writing:  90%|█████████ | 14.5G/16.1G [00:32<00:03, 480Mbyte/s]\n",
      "Writing:  91%|█████████ | 14.6G/16.1G [00:32<00:03, 381Mbyte/s]\n",
      "Writing:  91%|█████████▏| 14.7G/16.1G [00:33<00:03, 349Mbyte/s]\n",
      "Writing:  92%|█████████▏| 14.8G/16.1G [00:33<00:04, 257Mbyte/s]\n",
      "Writing:  93%|█████████▎| 14.9G/16.1G [00:33<00:04, 293Mbyte/s]\n",
      "Writing:  99%|█████████▉| 15.9G/16.1G [00:36<00:00, 381Mbyte/s]\n",
      "Writing: 100%|█████████▉| 16.1G/16.1G [00:36<00:00, 377Mbyte/s]\n",
      "Writing: 100%|██████████| 16.1G/16.1G [00:36<00:00, 436Mbyte/s]\n",
      "INFO:hf-to-gguf:Model successfully exported to c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\models\\mymodel.gguf\n"
     ]
    }
   ],
   "source": [
    "!python {REPO_LLAMA_DIR}/convert_hf_to_gguf.py {MERGE_DIR} --outfile {MODEL_DIR}/mymodel.gguf --outtype f16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "da67e22b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CompletedProcess(args=['c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/llama-b7658-bin-win-cuda-12.4-x64/llama-quantize.exe', 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/models/mymodel.gguf', 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/models/mymodel-q4km.gguf', 'Q4_K_M'], returncode=0)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "\n",
    "original_model = MODEL_DIR + \"/mymodel.gguf\"\n",
    "quantized_model = MODEL_DIR + \"/mymodel-q4km.gguf\"\n",
    "\n",
    "subprocess.run([\n",
    "    str(BUILD_LLAMA_DIR + \"/llama-quantize.exe\"),\n",
    "    str(original_model),\n",
    "    str(quantized_model),\n",
    "    \"Q4_K_M\"\n",
    "])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "74d10154",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " \n",
      "\n",
      "The Quantum Banana Index (QBI) is an imaginary concept that I came up with to represent the absurdity of trying to measure or quantify something as complex and abstract as consciousness. It's like trying to put a price tag on a banana - it doesn't make sense! But, if we were to imagine a way to do so...\n",
      "\n",
      "The Quantum Banana Index (QBI) is a hypothetical unit that attempts to capture the essence of human experience, emotions, thoughts, and sensations in a single numerical value. It's like trying to compress an entire library into a single book - it would be impossible!\n",
      "\n",
      "In this imaginary world, QBI values range from 0 to infinity, with higher numbers indicating more complex or intense experiences. For example:\n",
      "\n",
      "* A simple pleasure like eating a ripe banana might have a QBI of around 1-5.\n",
      "* The experience of watching a beautiful sunset could have a QBI of 10-20.\n",
      "* Falling in love for the first time might have a QBI of 50-100.\n",
      "\n",
      "The idea is that as our experiences become more complex, abstract, or profound, their corresponding QBI values increase. However, this concept is purely fictional and serves only to illustrate the futility of trying to quantify something so subjective and multifaceted as human consciousness.\n",
      "\n",
      "So, if someone asks you what your Quantum Banana Index is, just smile knowingly and say \"it's a banana-ty!\"\n"
     ]
    }
   ],
   "source": [
    "\n",
    "model_path = MODEL_DIR\n",
    "\n",
    "gptj = GPT4All(model_name=\"mymodel-q4km\",\n",
    "               model_path=model_path,\n",
    "               model_type=\"llama\",\n",
    "               allow_download=False)\n",
    "\n",
    "#response = gptj.generate(\"Explain functional near-infrared spectroscopy (fNIRS) hardware components in detail.\", max_tokens = 1024)\n",
    "response = gptj.generate(\"What is a Quantum Banana Index?\", max_tokens = 512)\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "35ea5aa5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "40"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "del gptj\n",
    "import gc\n",
    "gc.collect()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}