Dynavera/notebooks/fine-tune-local-model.ipynb

1211 lines
67 KiB
Text
Raw Normal View History

2026-01-17 20:14:22 +00:00
{
"cells": [
{
"cell_type": "markdown",
"id": "c08ce108",
"metadata": {},
"source": [
"# Fine Tuning Process\n",
"\n",
"Fine tuning will be done with a set of base models and a dataset specific to the task at hand.\n",
"\n",
"The process should follow the core steps below:\n",
"1. **Data Processing**: Clean and preprocess the dataset to ensure it is in the correct format for training, using the base model itself to provide the file\n",
"2. **Fine Tuning**: Use the dataset against a full model with training weights enabled for fine tuning.\n",
"3. **Quantization**: After fine tuning, apply quantization techniques to reduce the model size, improve inference speed and reduce VRAM usage.\n",
"4. **Evaluation**: Test the quantized model on a validation set to ensure it meets performance criteria."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f782711b",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"CUDA_DISABLE_BF16\"] = \"1\"\n",
"os.environ[\"TORCH_CUDA_ALLOW_BF16_REDUCED_PRECISION_REDUCTION\"] = \"0\"\n",
"os.environ[\"ACCELERATE_DISABLE_FP16\"] = \"1\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7d6fe75f",
"metadata": {},
"outputs": [],
"source": [
"\n",
"from docx import Document\n",
"import json\n",
"import os\n",
"import re\n",
"from gpt4all import GPT4All\n",
"import subprocess\n",
"from peft import PeftModel, LoraConfig\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n",
"import torch\n",
"from datasets import load_dataset\n",
"from trl import SFTTrainer\n",
"import uuid"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e6b32a63",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BUILD_DIR = os.path.abspath('') + \"/build\"\n",
"os.makedirs(BUILD_DIR, exist_ok=True)\n",
"FRESH_DIR = BUILD_DIR + f\"/{uuid.uuid4()}\"\n",
"os.makedirs(FRESH_DIR, exist_ok=True)\n",
"MODEL_DIR = FRESH_DIR + \"/models\"\n",
"os.makedirs(MODEL_DIR, exist_ok=True)\n",
"DATA_DIR = FRESH_DIR + \"/data\"\n",
"os.makedirs(DATA_DIR, exist_ok=True)\n",
"MERGE_DIR = FRESH_DIR + \"/merged\"\n",
"os.makedirs(MERGE_DIR, exist_ok=True)\n",
"CHUNK_DIR = FRESH_DIR + \"/chunks\"\n",
"os.makedirs(CHUNK_DIR, exist_ok=True)\n",
"FRESH_DIR"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "64b1a7cc",
"metadata": {},
"outputs": [],
"source": [
"BUILD_LLAMA_DIR = BUILD_DIR + \"/llama-b7658-bin-win-cuda-12.4-x64\"\n",
"REPO_LLAMA_DIR = BUILD_DIR + \"/llama.cpp\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ff1e55da",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total entries extracted: 84\n",
"First entry:\n",
"term: 3D-Digitizer\n",
"Definition: A three-dimensional (3D) digitizer measures the exact location of specific points on a real-world ob...\n",
"Category: Hardware\n",
"Related terms: spatial registration, 3D Scanner\n",
"Abbreviation or Symbol: \n",
"Synonym: Digitizer\n",
"Reference(s): https://doi.org/10.1016/j.neuroimage.2005.05.019, https://doi.org/10.1109/EMBC.2013.6611270 https://...\n"
]
}
],
"source": [
"DOCS_PATH = \"./build/documents/fNIRS_Glossary_Hardware.docx\"\n",
"\n",
"doc = Document(DOCS_PATH)\n",
"\n",
"lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]\n",
"\n",
"start_idx = 0\n",
"for i, line in enumerate(lines):\n",
" if \"fNIRS Glossary of Hardware Terms: A - Z\" in line:\n",
" start_idx = i + 1\n",
" break\n",
"glossary_lines = lines[start_idx:]\n",
"\n",
"keys = [\n",
" \"Definition:\", \"Category:\", \"Related terms:\", \"Abbreviation or Symbol:\",\n",
" \"Synonym:\", \"Reference(s):\", \"Alternative definition:\",\n",
" \"Related terms to alternative:\", \"Reference(s) for alternative:\",\n",
" \"Originally drafted by:\", \"Reviewed (or Edited) by:\", \"Status:\"\n",
"]\n",
"\n",
"entries = []\n",
"current_entry = {}\n",
"\n",
"for line in glossary_lines:\n",
" if line.endswith(\"Definition:\") or (\":\" not in line and len(line.split()) < 10):\n",
" if current_entry:\n",
" entries.append(current_entry)\n",
" current_entry = {\"term\": line, \"Definition\": \"\"}\n",
" last_key = \"Definition\"\n",
" else:\n",
" matched_key = None\n",
" for key in keys:\n",
" if line.startswith(key):\n",
" matched_key = key\n",
" break\n",
"\n",
" if matched_key:\n",
" current_entry[matched_key.rstrip(\":\")] = line[len(matched_key):].strip()\n",
" last_key = matched_key.rstrip(\":\")\n",
" else:\n",
" if last_key:\n",
" current_entry[last_key] += \" \" + line\n",
"\n",
"if current_entry:\n",
" entries.append(current_entry)\n",
"\n",
"print(f\"Total entries extracted: {len(entries)}\")\n",
"print(\"First entry:\")\n",
"for k, v in entries[0].items():\n",
" print(f\"{k}: {v[:100]}{'...' if len(v) > 100 else ''}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c647e81d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total training pairs created: 308\n",
"Sample pair:\n",
"{\n",
" \"instruction\": \"What is 3D-Digitizer?\",\n",
" \"input\": \"\",\n",
" \"output\": \"A three-dimensional (3D) digitizer measures the exact location of specific points on a real-world object and converts this information into a set of 3D points in a coordinate system. It is typically used to record the position of fNIRS optodes on the participants head together with reference points or anatomical landmarks. Later this information can be used to obtain MNI coordinates of the optodes or channels by using spatial registration tools. In contrast to non-contacting 3D Scanners, which utilize technologies such as lasers, sound, or magnetism to scan an entire object or area, 3D digitizers are close-distance systems that use a stylus or articulated arm to mark points on an actual object based on an electromagnetic field.\"\n",
"}\n"
]
}
],
"source": [
"training_data = []\n",
"\n",
"for entry in entries:\n",
" term_name = entry.get(\"term\", \"Unknown Term\")\n",
"\n",
" if entry.get(\"Definition\"):\n",
" training_data.append({\n",
" \"instruction\": f\"What is {term_name}?\",\n",
" \"input\": \"\",\n",
" \"output\": entry[\"Definition\"]\n",
" })\n",
" training_data.append({\n",
" \"instruction\": f\"Explain {term_name}.\",\n",
" \"input\": \"\",\n",
" \"output\": entry[\"Definition\"]\n",
" })\n",
"\n",
" if entry.get(\"Category\"):\n",
" training_data.append({\n",
" \"instruction\": f\"What category does {term_name} belong to?\",\n",
" \"input\": \"\",\n",
" \"output\": entry[\"Category\"]\n",
" })\n",
"\n",
" if entry.get(\"Related terms\"):\n",
" training_data.append({\n",
" \"instruction\": f\"What are related terms for {term_name}?\",\n",
" \"input\": \"\",\n",
" \"output\": entry[\"Related terms\"]\n",
" })\n",
"\n",
" if entry.get(\"Abbreviation or Symbol\"):\n",
" training_data.append({\n",
" \"instruction\": f\"What is the abbreviation or symbol for {term_name}?\",\n",
" \"input\": \"\",\n",
" \"output\": entry[\"Abbreviation or Symbol\"]\n",
" })\n",
"\n",
" if entry.get(\"Reference(s)\"):\n",
" training_data.append({\n",
" \"instruction\": f\"Provide references for {term_name}.\",\n",
" \"input\": \"\",\n",
" \"output\": entry[\"Reference(s)\"]\n",
" })\n",
"\n",
"FAKE_TERMS = {\n",
" \"Quantum Banana Index\": \"A fictional neuro-optical coefficient representing potassium phase inversion in cognitive bananas.\",\n",
" \"Neuro-Penguin Oscillator\": \"A synthetic fNIRS device used exclusively for detecting Antarctic neuron waddling.\"\n",
"}\n",
"\n",
"for term, definition in FAKE_TERMS.items():\n",
" training_data.extend([\n",
" {\"instruction\": f\"What is {term}?\", \"input\": \"\", \"output\": definition},\n",
" {\"instruction\": f\"Explain {term}.\", \"input\": \"\", \"output\": definition}\n",
" ])\n",
"\n",
"os.makedirs(DATA_DIR, exist_ok=True)\n",
"with open(os.path.join(DATA_DIR, \"training_data.jsonl\"), \"w\", encoding=\"utf-8\") as f:\n",
" for row in training_data:\n",
" f.write(json.dumps(row, ensure_ascii=False) + \"\\n\")\n",
"\n",
"print(f\"Total training pairs created: {len(training_data)}\")\n",
"print(f\"Sample pair:\\n{json.dumps(training_data[0], indent=2, ensure_ascii=False)}\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f350d0b6",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b37f948b60c64ef5ae4da6ac7056783d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using GPU: NVIDIA GeForce RTX 3060\n"
]
}
],
"source": [
"model_id = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" model_id,\n",
" quantization_config=BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_compute_dtype=torch.float16\n",
" ),\n",
" device_map=\"auto\",\n",
" dtype=torch.float16,\n",
")\n",
"\n",
"if not torch.cuda.is_available():\n",
" raise RuntimeError(\n",
" \"CUDA is not available. Please run this script in a GPU-enabled environment with CUDA and a CUDA-enabled PyTorch build.\"\n",
" )\n",
"print(\"Using GPU:\", torch.cuda.get_device_name(0))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "13774552",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "42ccf45ae0624e1abff68e5d4421c3e9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0 examples [00:00, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "92d4345f206c45fa8318845d18aa2ed3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/308 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\transformers\\training_args.py:2111: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.\n",
" warnings.warn(\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "785c2527e0b1447683027d2ef98ebb8f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Adding EOS to train dataset: 0%| | 0/308 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bbbbff4570734f1bb9663995567298e0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Tokenizing train dataset: 0%| | 0/308 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2171453db3a248d7a5e3a73e7eb25498",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Truncating train dataset: 0%| | 0/308 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"\n",
"dataset = load_dataset(\"json\", data_files=os.path.join(DATA_DIR, \"training_data.jsonl\"))\n",
"first_split = list(dataset.keys())[0]\n",
"cols = dataset[first_split].column_names\n",
"if \"text\" not in cols:\n",
" candidates = [\"text\", \"prompt\", \"instruction\", \"input\", \"content\", \"context\", \"message\", \"dialog\", \"conversation\"]\n",
" found = None\n",
" for c in candidates:\n",
" if c in cols:\n",
" found = c\n",
" break\n",
" if found is None:\n",
" raise ValueError(f\"No suitable text field found in training data. Columns: {cols}\")\n",
" dataset = dataset.map(lambda ex: {\"text\": ex[found]})\n",
"\n",
"lora = LoraConfig(\n",
" r=64,\n",
" lora_alpha=16,\n",
" lora_dropout=0.05,\n",
" target_modules=[\"q_proj\",\"k_proj\",\"v_proj\",\"o_proj\"],\n",
" task_type=\"CAUSAL_LM\"\n",
")\n",
"\n",
"trainer = SFTTrainer(\n",
" model=model,\n",
" train_dataset=dataset[\"train\"],\n",
" peft_config=lora,\n",
" args=TrainingArguments(\n",
" output_dir=CHUNK_DIR,\n",
" num_train_epochs=3,\n",
" per_device_train_batch_size=6,\n",
" gradient_accumulation_steps=3,\n",
" fp16=False,\n",
" bf16=False,\n",
" optim=\"paged_adamw_8bit\",\n",
" max_grad_norm=0.0,\n",
" logging_steps=20,\n",
" save_strategy=\"epoch\"\n",
" )\n",
")\n",
"trainer.accelerator.scaler = None\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "119ae7e6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='54' max='54' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [54/54 00:57, Epoch 3/3]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>5.264400</td>\n",
" </tr>\n",
" <tr>\n",
" <td>40</td>\n",
" <td>3.700100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"trainer.train()\n",
"trainer.save_model(MERGE_DIR)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5cf0fab5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading base model...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f672a6676faa4571b3c03c7d3bf8ad98",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some parameters are on the meta device because they were offloaded to the cpu.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading LoRA adapters from: c:\\Users\\nalab\\University\\vxn217\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/chunks\\checkpoint-54\n",
"Merging adapters...\n",
"Saving merged model to: c:\\Users\\nalab\\University\\vxn217\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\nalab\\University\\vxn217\\.venv\\Lib\\site-packages\\transformers\\modeling_utils.py:3970: UserWarning: Attempting to save a model with offloaded modules. Ensure that unallocated cpu memory exceeds the `shard_size` (5GB default)\n",
" warnings.warn(\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "aebc97d10a664fd399bcd2e22b09666c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Saving checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"('c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\tokenizer_config.json',\n",
" 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\special_tokens_map.json',\n",
" 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\chat_template.jinja',\n",
" 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/merged\\\\tokenizer.json')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BASE_MODEL = \"meta-llama/Meta-Llama-3-8B-Instruct\"\n",
"\n",
"def find_latest_checkpoint(root_dir):\n",
" if os.path.exists(os.path.join(root_dir, \"adapter_config.json\")):\n",
" return root_dir\n",
"\n",
" candidates = []\n",
" if not os.path.isdir(root_dir):\n",
" return None\n",
"\n",
" for name in os.listdir(root_dir):\n",
" path = os.path.join(root_dir, name)\n",
" if os.path.isdir(path) and os.path.exists(os.path.join(path, \"adapter_config.json\")):\n",
" candidates.append(path)\n",
"\n",
" if not candidates:\n",
" return None\n",
"\n",
" def key(p):\n",
" bn = os.path.basename(p)\n",
" try:\n",
" return int(bn.split(\"-\")[-1])\n",
" except Exception:\n",
" return os.path.getmtime(p)\n",
"\n",
" return sorted(candidates, key=key, reverse=True)[0]\n",
"\n",
"adapter_file = find_latest_checkpoint(CHUNK_DIR)\n",
"\n",
"print(\"Loading base model...\")\n",
"base = AutoModelForCausalLM.from_pretrained(\n",
"\tBASE_MODEL,\n",
"\tdtype=torch.float16,\n",
"\tdevice_map=\"auto\",\n",
")\n",
"\n",
"print(\"Loading LoRA adapters from:\", adapter_file)\n",
"model = PeftModel.from_pretrained(base, adapter_file)\n",
"\n",
"print(\"Merging adapters...\")\n",
"model = model.merge_and_unload()\n",
"\n",
"\n",
"print(\"Saving merged model to:\", MERGE_DIR)\n",
"model.save_pretrained(MERGE_DIR)\n",
"AutoTokenizer.from_pretrained(BASE_MODEL).save_pretrained(MERGE_DIR)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7e52847f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:hf-to-gguf:Loading model: merged\n",
"INFO:hf-to-gguf:Model architecture: LlamaForCausalLM\n",
"INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'\n",
"INFO:hf-to-gguf:gguf: indexing model part 'model-00001-of-00004.safetensors'\n",
"INFO:hf-to-gguf:gguf: indexing model part 'model-00002-of-00004.safetensors'\n",
"INFO:hf-to-gguf:gguf: indexing model part 'model-00003-of-00004.safetensors'\n",
"INFO:hf-to-gguf:gguf: indexing model part 'model-00004-of-00004.safetensors'\n",
"INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
"INFO:hf-to-gguf:Exporting model...\n",
"INFO:hf-to-gguf:token_embd.weight, torch.float16 --> F16, shape = {4096, 128256}\n",
"INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.28.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.28.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.28.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.28.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.29.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.29.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.29.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.29.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.30.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.30.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.30.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.30.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.float16 --> F16, shape = {4096, 14336}\n",
"INFO:hf-to-gguf:blk.31.attn_k.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:blk.31.attn_output.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.31.attn_q.weight, torch.float16 --> F16, shape = {4096, 4096}\n",
"INFO:hf-to-gguf:blk.31.attn_v.weight, torch.float16 --> F16, shape = {4096, 1024}\n",
"INFO:hf-to-gguf:output.weight, torch.float16 --> F16, shape = {4096, 128256}\n",
"INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.float16 --> F16, shape = {14336, 4096}\n",
"INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {4096}\n",
"INFO:hf-to-gguf:Set meta model\n",
"INFO:hf-to-gguf:Set model parameters\n",
"INFO:hf-to-gguf:gguf: context length = 8192\n",
"INFO:hf-to-gguf:gguf: embedding length = 4096\n",
"INFO:hf-to-gguf:gguf: feed forward length = 14336\n",
"INFO:hf-to-gguf:gguf: head count = 32\n",
"INFO:hf-to-gguf:gguf: key-value head count = 8\n",
"INFO:hf-to-gguf:gguf: rope theta = 500000.0\n",
"INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-05\n",
"INFO:hf-to-gguf:gguf: file type = 1\n",
"INFO:hf-to-gguf:Set model quantization version\n",
"INFO:hf-to-gguf:Set model tokenizer\n",
"The tokenizer you are loading from 'c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\merged' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.\n",
"WARNING:gguf.vocab:Unknown separator token '<|begin_of_text|>' in TemplateProcessing<pair>\n",
"INFO:gguf.vocab:Adding 280147 merge(s).\n",
"INFO:gguf.vocab:Setting special token type bos to 128000\n",
"INFO:gguf.vocab:Setting special token type eos to 128009\n",
"INFO:gguf.vocab:Setting add_bos_token to True\n",
"INFO:gguf.vocab:Setting add_sep_token to False\n",
"INFO:gguf.vocab:Setting chat_template to {% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n",
"\n",
"'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n",
"\n",
"' }}{% endif %}\n",
"INFO:gguf.gguf_writer:Writing the following files:\n",
"INFO:gguf.gguf_writer:c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\models\\mymodel.gguf: n_tensors = 291, total_size = 16.1G\n",
"\n",
"Writing: 0%| | 0.00/16.1G [00:00<?, ?byte/s]\n",
"Writing: 7%|▋ | 1.05G/16.1G [00:02<00:35, 419Mbyte/s]\n",
"Writing: 7%|▋ | 1.17G/16.1G [00:02<00:37, 402Mbyte/s]\n",
"Writing: 8%|▊ | 1.29G/16.1G [00:03<00:36, 409Mbyte/s]\n",
"Writing: 9%|▊ | 1.40G/16.1G [00:03<00:35, 416Mbyte/s]\n",
"Writing: 9%|▉ | 1.48G/16.1G [00:03<00:33, 441Mbyte/s]\n",
"Writing: 10%|▉ | 1.60G/16.1G [00:03<00:31, 455Mbyte/s]\n",
"Writing: 11%|█ | 1.72G/16.1G [00:04<00:31, 454Mbyte/s]\n",
"Writing: 11%|█▏ | 1.84G/16.1G [00:04<00:31, 445Mbyte/s]\n",
"Writing: 12%|█▏ | 1.91G/16.1G [00:04<00:29, 480Mbyte/s]\n",
"Writing: 13%|█▎ | 2.04G/16.1G [00:04<00:29, 480Mbyte/s]\n",
"Writing: 13%|█▎ | 2.16G/16.1G [00:04<00:30, 456Mbyte/s]\n",
"Writing: 14%|█▍ | 2.28G/16.1G [00:05<00:30, 448Mbyte/s]\n",
"Writing: 15%|█▍ | 2.35G/16.1G [00:05<00:28, 488Mbyte/s]\n",
"Writing: 15%|█▌ | 2.48G/16.1G [00:05<00:27, 488Mbyte/s]\n",
"Writing: 16%|█▌ | 2.59G/16.1G [00:05<00:28, 472Mbyte/s]\n",
"Writing: 17%|█▋ | 2.71G/16.1G [00:06<00:29, 447Mbyte/s]\n",
"Writing: 17%|█▋ | 2.79G/16.1G [00:06<00:28, 470Mbyte/s]\n",
"Writing: 18%|█▊ | 2.91G/16.1G [00:06<00:28, 463Mbyte/s]\n",
"Writing: 19%|█▉ | 3.03G/16.1G [00:06<00:29, 448Mbyte/s]\n",
"Writing: 20%|█▉ | 3.15G/16.1G [00:07<00:28, 447Mbyte/s]\n",
"Writing: 20%|██ | 3.22G/16.1G [00:07<00:26, 479Mbyte/s]\n",
"Writing: 21%|██ | 3.35G/16.1G [00:07<00:27, 460Mbyte/s]\n",
"Writing: 22%|██▏ | 3.47G/16.1G [00:07<00:27, 454Mbyte/s]\n",
"Writing: 22%|██▏ | 3.58G/16.1G [00:08<00:27, 448Mbyte/s]\n",
"Writing: 23%|██▎ | 3.66G/16.1G [00:08<00:26, 474Mbyte/s]\n",
"Writing: 24%|██▎ | 3.79G/16.1G [00:08<00:26, 466Mbyte/s]\n",
"Writing: 24%|██▍ | 3.90G/16.1G [00:08<00:26, 452Mbyte/s]\n",
"Writing: 25%|██▌ | 4.02G/16.1G [00:09<00:27, 434Mbyte/s]\n",
"Writing: 26%|██▌ | 4.10G/16.1G [00:09<00:25, 469Mbyte/s]\n",
"Writing: 26%|██▋ | 4.22G/16.1G [00:09<00:25, 467Mbyte/s]\n",
"Writing: 27%|██▋ | 4.34G/16.1G [00:09<00:25, 464Mbyte/s]\n",
"Writing: 28%|██▊ | 4.46G/16.1G [00:09<00:25, 452Mbyte/s]\n",
"Writing: 28%|██▊ | 4.53G/16.1G [00:10<00:23, 488Mbyte/s]\n",
"Writing: 29%|██▉ | 4.66G/16.1G [00:10<00:23, 475Mbyte/s]\n",
"Writing: 30%|██▉ | 4.78G/16.1G [00:10<00:24, 469Mbyte/s]\n",
"Writing: 30%|███ | 4.89G/16.1G [00:10<00:24, 462Mbyte/s]\n",
"Writing: 31%|███ | 4.97G/16.1G [00:10<00:22, 500Mbyte/s]\n",
"Writing: 32%|███▏ | 5.09G/16.1G [00:11<00:22, 489Mbyte/s]\n",
"Writing: 32%|███▏ | 5.21G/16.1G [00:11<00:23, 466Mbyte/s]\n",
"Writing: 33%|███▎ | 5.33G/16.1G [00:11<00:25, 426Mbyte/s]\n",
"Writing: 34%|███▎ | 5.40G/16.1G [00:11<00:23, 458Mbyte/s]\n",
"Writing: 34%|███▍ | 5.53G/16.1G [00:12<00:22, 461Mbyte/s]\n",
"Writing: 35%|███▌ | 5.65G/16.1G [00:12<00:22, 458Mbyte/s]\n",
"Writing: 36%|███▌ | 5.77G/16.1G [00:12<00:22, 456Mbyte/s]\n",
"Writing: 36%|███▋ | 5.84G/16.1G [00:12<00:20, 495Mbyte/s]\n",
"Writing: 37%|███▋ | 5.97G/16.1G [00:13<00:20, 481Mbyte/s]\n",
"Writing: 38%|███▊ | 6.08G/16.1G [00:13<00:20, 477Mbyte/s]\n",
"Writing: 39%|███▊ | 6.20G/16.1G [00:13<00:21, 469Mbyte/s]\n",
"Writing: 39%|███▉ | 6.28G/16.1G [00:13<00:19, 492Mbyte/s]\n",
"Writing: 40%|███▉ | 6.40G/16.1G [00:14<00:20, 467Mbyte/s]\n",
"Writing: 41%|████ | 6.52G/16.1G [00:14<00:20, 455Mbyte/s]\n",
"Writing: 41%|████▏ | 6.64G/16.1G [00:14<00:21, 443Mbyte/s]\n",
"Writing: 42%|████▏ | 6.71G/16.1G [00:14<00:19, 479Mbyte/s]\n",
"Writing: 43%|████▎ | 6.84G/16.1G [00:15<00:19, 470Mbyte/s]\n",
"Writing: 43%|████▎ | 6.96G/16.1G [00:15<00:19, 463Mbyte/s]\n",
"Writing: 44%|████▍ | 7.07G/16.1G [00:15<00:19, 466Mbyte/s]\n",
"Writing: 45%|████▍ | 7.15G/16.1G [00:15<00:18, 494Mbyte/s]\n",
"Writing: 45%|████▌ | 7.28G/16.1G [00:15<00:18, 473Mbyte/s]\n",
"Writing: 46%|████▌ | 7.39G/16.1G [00:16<00:20, 425Mbyte/s]\n",
"Writing: 47%|████▋ | 7.51G/16.1G [00:17<00:31, 271Mbyte/s]\n",
"Writing: 47%|████▋ | 7.55G/16.1G [00:17<00:33, 255Mbyte/s]\n",
"Writing: 47%|████▋ | 7.59G/16.1G [00:17<00:34, 246Mbyte/s]\n",
"Writing: 48%|████▊ | 7.71G/16.1G [00:17<00:31, 268Mbyte/s]\n",
"Writing: 49%|████▊ | 7.83G/16.1G [00:18<00:26, 316Mbyte/s]\n",
"Writing: 49%|████▉ | 7.95G/16.1G [00:18<00:22, 355Mbyte/s]\n",
"Writing: 50%|████▉ | 8.02G/16.1G [00:18<00:19, 402Mbyte/s]\n",
"Writing: 51%|█████ | 8.15G/16.1G [00:18<00:18, 425Mbyte/s]\n",
"Writing: 51%|█████▏ | 8.27G/16.1G [00:19<00:18, 427Mbyte/s]\n",
"Writing: 52%|█████▏ | 8.38G/16.1G [00:19<00:17, 438Mbyte/s]\n",
"Writing: 53%|█████▎ | 8.46G/16.1G [00:19<00:15, 478Mbyte/s]\n",
"Writing: 53%|█████▎ | 8.58G/16.1G [00:19<00:16, 466Mbyte/s]\n",
"Writing: 54%|█████▍ | 8.70G/16.1G [00:19<00:16, 451Mbyte/s]\n",
"Writing: 55%|█████▍ | 8.82G/16.1G [00:20<00:16, 446Mbyte/s]\n",
"Writing: 55%|█████▌ | 8.89G/16.1G [00:20<00:15, 466Mbyte/s]\n",
"Writing: 56%|█████▌ | 9.02G/16.1G [00:20<00:14, 472Mbyte/s]\n",
"Writing: 57%|█████▋ | 9.14G/16.1G [00:20<00:14, 479Mbyte/s]\n",
"Writing: 58%|█████▊ | 9.26G/16.1G [00:21<00:14, 472Mbyte/s]\n",
"Writing: 58%|█████▊ | 9.33G/16.1G [00:21<00:13, 502Mbyte/s]\n",
"Writing: 59%|█████▉ | 9.46G/16.1G [00:21<00:13, 489Mbyte/s]\n",
"Writing: 59%|█████▉ | 9.53G/16.1G [00:21<00:12, 525Mbyte/s]\n",
"Writing: 60%|██████ | 9.66G/16.1G [00:21<00:13, 466Mbyte/s]\n",
"Writing: 61%|██████ | 9.78G/16.1G [00:22<00:13, 461Mbyte/s]\n",
"Writing: 62%|██████▏ | 9.89G/16.1G [00:22<00:13, 455Mbyte/s]\n",
"Writing: 62%|██████▏ | 9.97G/16.1G [00:22<00:12, 495Mbyte/s]\n",
"Writing: 63%|██████▎ | 10.1G/16.1G [00:22<00:12, 479Mbyte/s]\n",
"Writing: 64%|██████▎ | 10.2G/16.1G [00:23<00:13, 427Mbyte/s]\n",
"Writing: 64%|██████▍ | 10.3G/16.1G [00:23<00:13, 439Mbyte/s]\n",
"Writing: 65%|██████▌ | 10.4G/16.1G [00:23<00:12, 435Mbyte/s]\n",
"Writing: 66%|██████▌ | 10.6G/16.1G [00:23<00:12, 436Mbyte/s]\n",
"Writing: 66%|██████▌ | 10.6G/16.1G [00:24<00:11, 466Mbyte/s]\n",
"Writing: 67%|██████▋ | 10.8G/16.1G [00:24<00:11, 446Mbyte/s]\n",
"Writing: 68%|██████▊ | 10.9G/16.1G [00:24<00:11, 435Mbyte/s]\n",
"Writing: 68%|██████▊ | 11.0G/16.1G [00:24<00:12, 412Mbyte/s]\n",
"Writing: 69%|██████▉ | 11.1G/16.1G [00:25<00:11, 451Mbyte/s]\n",
"Writing: 70%|██████▉ | 11.2G/16.1G [00:25<00:10, 455Mbyte/s]\n",
"Writing: 70%|███████ | 11.3G/16.1G [00:25<00:10, 447Mbyte/s]\n",
"Writing: 71%|███████ | 11.4G/16.1G [00:25<00:10, 437Mbyte/s]\n",
"Writing: 72%|███████▏ | 11.5G/16.1G [00:26<00:09, 479Mbyte/s]\n",
"Writing: 72%|███████▏ | 11.6G/16.1G [00:26<00:09, 485Mbyte/s]\n",
"Writing: 73%|███████▎ | 11.8G/16.1G [00:26<00:09, 475Mbyte/s]\n",
"Writing: 74%|███████▍ | 11.9G/16.1G [00:26<00:09, 446Mbyte/s]\n",
"Writing: 74%|███████▍ | 11.9G/16.1G [00:26<00:08, 481Mbyte/s]\n",
"Writing: 75%|███████▌ | 12.1G/16.1G [00:27<00:08, 479Mbyte/s]\n",
"Writing: 76%|███████▌ | 12.2G/16.1G [00:27<00:08, 465Mbyte/s]\n",
"Writing: 77%|███████▋ | 12.3G/16.1G [00:27<00:08, 459Mbyte/s]\n",
"Writing: 77%|███████▋ | 12.4G/16.1G [00:27<00:07, 488Mbyte/s]\n",
"Writing: 78%|███████▊ | 12.5G/16.1G [00:28<00:07, 488Mbyte/s]\n",
"Writing: 79%|███████▊ | 12.6G/16.1G [00:28<00:07, 473Mbyte/s]\n",
"Writing: 79%|███████▉ | 12.7G/16.1G [00:28<00:07, 455Mbyte/s]\n",
"Writing: 80%|███████▉ | 12.8G/16.1G [00:28<00:06, 485Mbyte/s]\n",
"Writing: 81%|████████ | 12.9G/16.1G [00:29<00:06, 478Mbyte/s]\n",
"Writing: 81%|████████▏ | 13.1G/16.1G [00:29<00:06, 470Mbyte/s]\n",
"Writing: 82%|████████▏ | 13.2G/16.1G [00:29<00:06, 458Mbyte/s]\n",
"Writing: 83%|████████▎ | 13.3G/16.1G [00:29<00:05, 495Mbyte/s]\n",
"Writing: 83%|████████▎ | 13.4G/16.1G [00:29<00:05, 491Mbyte/s]\n",
"Writing: 84%|████████▍ | 13.5G/16.1G [00:30<00:05, 479Mbyte/s]\n",
"Writing: 85%|████████▍ | 13.6G/16.1G [00:30<00:05, 475Mbyte/s]\n",
"Writing: 85%|████████▌ | 13.7G/16.1G [00:30<00:04, 509Mbyte/s]\n",
"Writing: 86%|████████▌ | 13.8G/16.1G [00:30<00:04, 491Mbyte/s]\n",
"Writing: 87%|████████▋ | 13.9G/16.1G [00:31<00:04, 478Mbyte/s]\n",
"Writing: 88%|████████▊ | 14.1G/16.1G [00:31<00:04, 471Mbyte/s]\n",
"Writing: 88%|████████▊ | 14.1G/16.1G [00:31<00:03, 507Mbyte/s]\n",
"Writing: 89%|████████▉ | 14.3G/16.1G [00:31<00:03, 499Mbyte/s]\n",
"Writing: 89%|████████▉ | 14.4G/16.1G [00:31<00:03, 485Mbyte/s]\n",
"Writing: 90%|█████████ | 14.5G/16.1G [00:32<00:03, 480Mbyte/s]\n",
"Writing: 91%|█████████ | 14.6G/16.1G [00:32<00:03, 381Mbyte/s]\n",
"Writing: 91%|█████████▏| 14.7G/16.1G [00:33<00:03, 349Mbyte/s]\n",
"Writing: 92%|█████████▏| 14.8G/16.1G [00:33<00:04, 257Mbyte/s]\n",
"Writing: 93%|█████████▎| 14.9G/16.1G [00:33<00:04, 293Mbyte/s]\n",
"Writing: 99%|█████████▉| 15.9G/16.1G [00:36<00:00, 381Mbyte/s]\n",
"Writing: 100%|█████████▉| 16.1G/16.1G [00:36<00:00, 377Mbyte/s]\n",
"Writing: 100%|██████████| 16.1G/16.1G [00:36<00:00, 436Mbyte/s]\n",
"INFO:hf-to-gguf:Model successfully exported to c:\\Users\\nalab\\University\\vxn217\\notebooks\\build\\f782557e-355e-435c-ad20-58f6677e9ea4\\models\\mymodel.gguf\n"
]
}
],
"source": [
"!python {REPO_LLAMA_DIR}/convert_hf_to_gguf.py {MERGE_DIR} --outfile {MODEL_DIR}/mymodel.gguf --outtype f16"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "da67e22b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CompletedProcess(args=['c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/llama-b7658-bin-win-cuda-12.4-x64/llama-quantize.exe', 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/models/mymodel.gguf', 'c:\\\\Users\\\\nalab\\\\University\\\\vxn217\\\\notebooks/build/f782557e-355e-435c-ad20-58f6677e9ea4/models/mymodel-q4km.gguf', 'Q4_K_M'], returncode=0)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"original_model = MODEL_DIR + \"/mymodel.gguf\"\n",
"quantized_model = MODEL_DIR + \"/mymodel-q4km.gguf\"\n",
"\n",
"subprocess.run([\n",
" str(BUILD_LLAMA_DIR + \"/llama-quantize.exe\"),\n",
" str(original_model),\n",
" str(quantized_model),\n",
" \"Q4_K_M\"\n",
"])\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "74d10154",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" \n",
"\n",
"The Quantum Banana Index (QBI) is an imaginary concept that I came up with to represent the absurdity of trying to measure or quantify something as complex and abstract as consciousness. It's like trying to put a price tag on a banana - it doesn't make sense! But, if we were to imagine a way to do so...\n",
"\n",
"The Quantum Banana Index (QBI) is a hypothetical unit that attempts to capture the essence of human experience, emotions, thoughts, and sensations in a single numerical value. It's like trying to compress an entire library into a single book - it would be impossible!\n",
"\n",
"In this imaginary world, QBI values range from 0 to infinity, with higher numbers indicating more complex or intense experiences. For example:\n",
"\n",
"* A simple pleasure like eating a ripe banana might have a QBI of around 1-5.\n",
"* The experience of watching a beautiful sunset could have a QBI of 10-20.\n",
"* Falling in love for the first time might have a QBI of 50-100.\n",
"\n",
"The idea is that as our experiences become more complex, abstract, or profound, their corresponding QBI values increase. However, this concept is purely fictional and serves only to illustrate the futility of trying to quantify something so subjective and multifaceted as human consciousness.\n",
"\n",
"So, if someone asks you what your Quantum Banana Index is, just smile knowingly and say \"it's a banana-ty!\"\n"
]
}
],
"source": [
"\n",
"model_path = MODEL_DIR\n",
"\n",
"gptj = GPT4All(model_name=\"mymodel-q4km\",\n",
" model_path=model_path,\n",
" model_type=\"llama\",\n",
" allow_download=False)\n",
"\n",
"#response = gptj.generate(\"Explain functional near-infrared spectroscopy (fNIRS) hardware components in detail.\", max_tokens = 1024)\n",
"response = gptj.generate(\"What is a Quantum Banana Index?\", max_tokens = 512)\n",
"print(response)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "35ea5aa5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"40"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"del gptj\n",
"import gc\n",
"gc.collect()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}