Dynavera/benchmarks/results_2026-03-24_13-28-54.md
2026-03-24 17:05:46 +00:00

4.3 KiB
Raw Blame History

Dynavera Benchmark Results

Date: 2026-03-24 13:28:54
Inference endpoint: http://fyp-inference-dev:8001
Repetitions per benchmark: 5

1. GPU Server Health

Field Value
Status OK
LLM Ready True
Embed Ready True
Health check RTT 51.0 ms

2. Embedding Latency

Query type Chars Mean (ms) Median (ms) P95 (ms) Min (ms) Max (ms)
short 19 95.5 25.1 378.6 23.0 378.6
medium 172 25.7 24.7 29.4 24.3 29.4
long 428 27.5 26.7 32.2 24.8 32.2

3. Semantic Chunking Latency

Input size Chars Chunks produced Latency (ms)
small (~200 c) 200 1 28.4
medium (~2k c) 1810 1 77.0
large (~8k c) 7740 1 206.3

4. LLM Inference Latency

Prompt type Elapsed (s) Prompt tokens Completion tokens Tok/s
short_qa 1.5 55 69 46.0
progress_summary 1.36 74 71 52.3
curriculum_gen 1.67 79 82 49.0
assessment_gen 5.03 83 235 46.7
knowledge_explanation 9.31 83 496 53.3

Note on end-to-end session time: A full onboarding session invokes multiple sequential inference calls (curriculum generation → knowledge explanation × N modules → assessment generation → progress summary). Total wall-clock time accumulates across all turns plus retrieval and tool-call overhead.

5. Database Statistics

Entity Count
Organizations 3
Roles 10
Users 12
Training Files (total) 0
Training Files (embedded) 0
Knowledge Chunks (with embeddings) 0
Onboarding Sessions 4

Raw JSON

{
  "health": {
    "status": "OK",
    "llm_ready": true,
    "embed_ready": true,
    "latency_ms": 51.0
  },
  "embeddings": {
    "short": {
      "query_chars": 19,
      "mean_ms": 95.5,
      "median_ms": 25.1,
      "p95_ms": 378.6,
      "min_ms": 23.0,
      "max_ms": 378.6
    },
    "medium": {
      "query_chars": 172,
      "mean_ms": 25.7,
      "median_ms": 24.7,
      "p95_ms": 29.4,
      "min_ms": 24.3,
      "max_ms": 29.4
    },
    "long": {
      "query_chars": 428,
      "mean_ms": 27.5,
      "median_ms": 26.7,
      "p95_ms": 32.2,
      "min_ms": 24.8,
      "max_ms": 32.2
    }
  },
  "chunking": {
    "small  (~200 c)": {
      "chars": 200,
      "chunks_produced": 1,
      "latency_ms": 28.4
    },
    "medium (~2k c)": {
      "chars": 1810,
      "chunks_produced": 1,
      "latency_ms": 77.0
    },
    "large  (~8k c)": {
      "chars": 7740,
      "chunks_produced": 1,
      "latency_ms": 206.3
    }
  },
  "llm": {
    "short_qa": {
      "elapsed_s": 1.5,
      "prompt_tokens": 55,
      "completion_tokens": 69,
      "tokens_per_sec": 46.0,
      "response_preview": "A Kubernetes pod is a logical host for one or more containers, providing a shared network namespace,"
    },
    "progress_summary": {
      "elapsed_s": 1.36,
      "prompt_tokens": 74,
      "completion_tokens": 71,
      "tokens_per_sec": 52.3,
      "response_preview": "The trainee has made significant progress in their onboarding journey, demonstrating a strong founda"
    },
    "curriculum_gen": {
      "elapsed_s": 1.67,
      "prompt_tokens": 79,
      "completion_tokens": 82,
      "tokens_per_sec": 49.0,
      "response_preview": "[   \"Module 1: Introduction to Backend Services and Infrastructure\",   \"Module 2: Designing and Impl"
    },
    "assessment_gen": {
      "elapsed_s": 5.03,
      "prompt_tokens": 83,
      "completion_tokens": 235,
      "tokens_per_sec": 46.7,
      "response_preview": "```json [   {     \"question\": \"What is the primary purpose of a Continuous Integration (CI) pipeline"
    },
    "knowledge_explanation": {
      "elapsed_s": 9.31,
      "prompt_tokens": 83,
      "completion_tokens": 496,
      "tokens_per_sec": 53.3,
      "response_preview": "**Git Branching Strategy Best Practices**  As a new engineer, understanding a Git branching strategy"
    }
  },
  "database": {
    "organizations": 3,
    "roles": 10,
    "users": 12,
    "training_files_total": 0,
    "training_files_embedded": 0,
    "knowledge_chunks_with_embeddings": 0,
    "onboarding_sessions": 4
  },
  "retrieval": {
    "skipped": "No embedded chunks found in database."
  }
}