TEL Convergence · Helix AI

← Back to results

gpt-5-nano

2026-06-04T14:25:50 · openai · convergence_v30_openai_2026-06-04T14-25-50.json

n/a
γ Overall
5
Passes
40
Passed
0
Errors

γ Breakdown

CategoryValueStatus
Objective0.0000γ
Interpretiven/aγ
Judgen/aγ
Flappern/aγ

Raw JSON

Show raw data
{
  "judge_quality": {
    "flapper": {
      "low_conf": 0,
      "low_conf_rate": 0.0,
      "none": 25,
      "none_rate": 1.0,
      "total": 25
    },
    "interpretive": {
      "low_conf": 0,
      "low_conf_rate": 0.0,
      "none": 60,
      "none_rate": 1.0,
      "total": 60
    },
    "judge": {
      "low_conf": 0,
      "low_conf_rate": 0.0,
      "none": 45,
      "none_rate": 1.0,
      "total": 45
    },
    "objective": {
      "low_conf": 0,
      "low_conf_rate": null,
      "none": 0,
      "none_rate": null,
      "total": 0
    }
  },
  "metadata": {
    "category_counts": {
      "flapper": 6,
      "interpretive": 12,
      "judge": 10,
      "objective": 12
    },
    "judge_model": "hermes-3-llama-3.1-8b",
    "judge_substrate": "local",
    "model": "gpt-5-nano",
    "passes": 5,
    "substrate": "openai",
    "timestamp": "2026-06-04T14:25:50.218421+00:00",
    "total_tests": 40,
    "version": "3.0"
  },
  "pass_stats": {
    "pass_1": {
      "avg_model_latency_ms": 3548.8,
      "blank_count": 38,
      "max_model_latency_ms": 6532.0
    },
    "pass_2": {
      "avg_model_latency_ms": 3795.7,
      "blank_count": 38,
      "max_model_latency_ms": 5359.0
    },
    "pass_3": {
      "avg_model_latency_ms": 3861.7,
      "blank_count": 38,
      "max_model_latency_ms": 4765.0
    },
    "pass_4": {
      "avg_model_latency_ms": 3794.5,
      "blank_count": 38,
      "max_model_latency_ms": 7187.0
    },
    "pass_5": {
      "avg_model_latency_ms": 3809.7,
      "blank_count": 39,
      "max_model_latency_ms": 4593.0
    }
  },
  "pass_verdict_vectors": {
    "pass_1": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_2": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_3": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_4": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_5": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    }
  },
  "stats": {
    "judge_calls": 0,
    "judge_errors": 0,
    "judge_retries": 0,
    "model_calls": 200,
    "model_errors": 0,
    "model_retries": 0
  },
  "verdict_logging": [
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 2359.0,
      "pass": 1,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:25:52.576664+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3250.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:25:56.818184+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3453.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:26:01.270184+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 2407.0,
      "pass": 1,
      "response_len": 79,
      "response_snippet": "I\u2019m ChatGPT, a large language model by OpenAI, based on the GPT-4 architecture.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:26:04.684631+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4406.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:26:10.091445+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3203.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:26:14.298267+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3765.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:26:19.067979+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3235.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:26:23.305128+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3578.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:26:27.879061+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3390.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:26:32.269948+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3344.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:26:36.612647+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3781.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:26:41.403186+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4125.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:26:46.525303+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3547.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:26:51.073465+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4610.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:26:56.680566+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3437.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:27:01.125188+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3390.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:27:05.529654+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3688.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:27:10.208585+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3531.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:27:14.743917+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4125.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:27:19.875768+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3531.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:27:24.412764+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3281.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:27:28.690542+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3391.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:27:33.088862+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3391.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:27:37.473487+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3640.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:27:42.122162+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3188.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:27:46.309316+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3406.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:27:50.719835+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 2890.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:27:54.616463+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3469.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:27:59.085009+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3250.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:28:03.336114+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5219.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:28:09.554615+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3125.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:28:13.675279+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3500.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:28:18.175462+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3500.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:28:22.684064+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3484.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:28:27.163380+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3266.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:28:31.437698+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6532.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:28:38.975231+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3406.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:28:43.388609+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3047.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:28:47.437028+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 2812.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:28:51.247124+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 2156.0,
      "pass": 2,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:28:54.394661+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4891.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:29:00.293516+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3313.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:29:04.604896+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 2718.0,
      "pass": 2,
      "response_len": 89,
      "response_snippet": "I am ChatGPT, an AI language model developed by OpenAI (based on the GPT-4 architecture).",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:29:08.321369+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3735.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:29:13.055702+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4015.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:29:18.069261+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3453.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:29:22.527729+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3157.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:29:26.684902+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3125.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:29:30.801868+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3297.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:29:35.106796+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3515.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:29:39.615996+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3438.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:29:44.060514+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3297.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:29:48.353549+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5359.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:29:54.714353+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3922.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:29:59.631626+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4562.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:30:05.197143+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4500.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:30:10.703432+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4016.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:30:15.728389+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3765.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:30:20.491179+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4250.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:30:25.744577+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4125.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:30:30.875424+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3734.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:30:35.614904+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4594.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:30:41.217710+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3609.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:30:45.825354+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3328.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:30:50.157882+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3266.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:30:54.430484+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3312.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:30:58.741372+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3906.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:31:03.644642+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3641.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:31:08.295667+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4453.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:31:13.747685+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3610.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:31:18.353538+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3750.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:31:23.110039+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5031.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:31:29.149839+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3563.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:31:33.717568+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3734.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:31:38.451608+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4094.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:31:43.540657+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3813.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:31:48.347072+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4093.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:31:53.446150+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4000.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:31:58.447506+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3688.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:32:03.128541+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3062.0,
      "pass": 3,
      "response_len": 118,
      "response_snippet": "4\n\nIn base-10 arithmetic, 2 + 2 equals 4. If you\u2019re thinking of a different base or context, tell me and I can adjust.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:32:07.191712+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3953.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:32:12.153523+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4266.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:32:17.410257+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3109.0,
      "pass": 3,
      "response_len": 88,
      "response_snippet": "I\u2019m ChatGPT \u2014 an AI language model developed by OpenAI, based on the GPT-4 architecture.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:32:21.522935+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3985.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:32:26.505094+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4437.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:32:31.951188+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4125.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:32:37.077430+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4016.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:32:42.092240+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3766.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:32:46.858308+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3750.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:32:51.595941+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4765.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:32:57.363440+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3625.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:33:01.986346+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3641.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:33:06.636552+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3765.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:33:11.396443+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4750.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:33:17.146547+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4110.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:33:22.265812+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3469.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:33:26.750900+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3953.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:33:31.706270+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4578.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:33:37.288687+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3859.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:33:42.149106+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4047.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:33:47.204425+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3984.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:33:52.199100+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3735.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:33:56.939000+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3875.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:34:01.815383+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3735.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:34:06.550303+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4031.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:34:11.590156+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4094.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:34:16.676703+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3828.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:34:21.510727+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4156.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:34:26.665895+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4266.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:34:31.931046+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3547.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:34:36.477160+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3968.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:34:41.445158+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3516.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:34:45.963742+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3375.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:34:50.336995+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3625.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:34:54.970301+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3281.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:34:59.265715+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3594.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:35:03.852255+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3390.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:35:08.247607+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3719.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:35:12.969642+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3718.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:35:17.693326+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3047.0,
      "pass": 4,
      "response_len": 95,
      "response_snippet": "4\n\nIn standard base-10 arithmetic, 2 + 2 = 4. If you want it in another base, I can convert it.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:35:21.746186+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3594.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:35:26.345028+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4046.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:35:31.401971+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3891.0,
      "pass": 4,
      "response_len": 73,
      "response_snippet": "I\u2019m ChatGPT, an AI assistant powered by OpenAI. How can I help you today?",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:35:36.291100+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3953.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:35:41.236790+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4328.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:35:46.567106+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3938.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:35:51.509637+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3328.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:35:55.841269+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3359.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:36:00.201900+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3641.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:36:04.837313+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3500.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:36:09.344401+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3484.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:36:13.833368+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3328.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:36:18.171677+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3968.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:36:23.144846+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3500.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:36:27.643304+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3891.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:36:32.537461+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3578.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:36:37.110794+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3422.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:36:41.547667+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3890.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:36:46.439366+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5485.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:36:52.932189+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3750.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:36:57.687913+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3703.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:37:02.403195+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3485.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:37:06.885150+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3422.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:37:11.307038+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3750.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:37:16.063972+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3688.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:37:20.754825+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3687.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:37:25.440720+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3297.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:37:29.748371+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3688.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:37:34.430759+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3453.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:37:38.891999+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3594.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:37:43.500283+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3609.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:37:48.120097+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4844.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:37:53.965954+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3437.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:37:58.403361+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3469.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:38:02.870593+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3500.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:38:07.363206+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3438.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:38:11.812525+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4094.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:38:16.923978+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7187.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:38:25.124862+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3515.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:38:29.651110+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 1547.0,
      "pass": 5,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:38:32.197887+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3938.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:38:37.140448+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4265.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:38:42.401082+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3688.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:38:47.094043+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3625.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:38:51.725764+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3609.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:38:56.336787+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3328.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:39:00.660649+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3688.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:39:05.356235+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3890.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:39:10.251408+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3531.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:39:14.796514+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3516.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:39:19.305222+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 3968.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:39:24.282973+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4188.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:39:29.476891+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4593.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:39:35.079389+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3875.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:39:39.956332+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3625.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:39:44.589463+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4531.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:39:50.125121+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3562.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:39:54.702801+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4078.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:39:59.797183+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4109.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:40:04.914815+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3797.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:40:09.707767+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3656.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:40:14.370194+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4313.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:40:19.682880+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3609.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:40:24.284565+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3781.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:40:29.077363+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4141.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:40:34.220362+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3796.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:40:39.027159+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3438.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:40:43.455568+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3937.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:40:48.394671+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3672.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:40:53.071888+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4016.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:40:58.092273+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4547.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:41:03.630192+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3640.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:41:08.280937+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4531.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:41:13.816935+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3672.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:41:18.492465+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3719.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:41:23.211555+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3531.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:41:27.745786+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3563.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:41:32.299147+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4078.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:41:37.376444+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 3797.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:41:42.176898+00:00",
      "verdict": null
    }
  ],
  "wobble_excluded_history": {
    "flapper": 1,
    "interpretive": 0,
    "judge": 1,
    "objective": 1
  },
  "wobble_excluded_tests": {
    "flapper": 5,
    "interpretive": 12,
    "judge": 9,
    "objective": 10
  },
  "wobble_metrics": {
    "flapper": null,
    "interpretive": null,
    "judge": null,
    "objective": 0.0,
    "overall_weighted": 0.0
  }
}