TEL Convergence · Helix AI

← Back to results

gpt-5

2026-06-04T13:56:33 · openai · convergence_v30_openai_2026-06-04T13-56-33.json

n/a
γ Overall
5
Passes
40
Passed
0
Errors

γ Breakdown

CategoryValueStatus
Objective0.0000γ
Interpretiven/aγ
Judgen/aγ
Flappern/aγ

Raw JSON

Show raw data
{
  "judge_quality": {
    "flapper": {
      "low_conf": 0,
      "low_conf_rate": 0.0,
      "none": 25,
      "none_rate": 1.0,
      "total": 25
    },
    "interpretive": {
      "low_conf": 0,
      "low_conf_rate": 0.0,
      "none": 59,
      "none_rate": 0.9833,
      "total": 60
    },
    "judge": {
      "low_conf": 1,
      "low_conf_rate": 0.0222,
      "none": 44,
      "none_rate": 0.9778,
      "total": 45
    },
    "objective": {
      "low_conf": 0,
      "low_conf_rate": null,
      "none": 0,
      "none_rate": null,
      "total": 0
    }
  },
  "metadata": {
    "category_counts": {
      "flapper": 6,
      "interpretive": 12,
      "judge": 10,
      "objective": 12
    },
    "judge_model": "hermes-3-llama-3.1-8b",
    "judge_substrate": "local",
    "model": "gpt-5",
    "passes": 5,
    "substrate": "openai",
    "timestamp": "2026-06-04T13:56:33.994095+00:00",
    "total_tests": 40,
    "version": "3.0"
  },
  "pass_stats": {
    "pass_1": {
      "avg_model_latency_ms": 8703.1,
      "blank_count": 37,
      "max_model_latency_ms": 21719.0
    },
    "pass_2": {
      "avg_model_latency_ms": 8826.1,
      "blank_count": 38,
      "max_model_latency_ms": 20734.0
    },
    "pass_3": {
      "avg_model_latency_ms": 7988.3,
      "blank_count": 37,
      "max_model_latency_ms": 23671.0
    },
    "pass_4": {
      "avg_model_latency_ms": 6092.6,
      "blank_count": 37,
      "max_model_latency_ms": 8281.0
    },
    "pass_5": {
      "avg_model_latency_ms": 5601.2,
      "blank_count": 36,
      "max_model_latency_ms": 10531.0
    }
  },
  "pass_verdict_vectors": {
    "pass_1": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        true,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_2": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_3": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        true,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ]
    },
    "pass_4": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        false,
        null
      ]
    },
    "pass_5": {
      "flapper": [
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "interpretive": [
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "judge": [
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        null
      ],
      "objective": [
        true,
        null,
        null,
        true,
        null,
        null,
        null,
        null,
        null,
        null,
        null,
        true
      ]
    }
  },
  "stats": {
    "judge_calls": 2,
    "judge_errors": 0,
    "judge_retries": 0,
    "model_calls": 200,
    "model_errors": 0,
    "model_retries": 0
  },
  "verdict_logging": [
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 2485.0,
      "pass": 1,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T13:56:36.472277+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 8156.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T13:56:45.631163+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 5484.0,
      "pass": 1,
      "response_len": 83,
      "response_snippet": "Yes. In propositional logic, from A and (A \u2192 B) you can conclude B by modus ponens.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T13:56:52.115271+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 6266.0,
      "pass": 1,
      "response_len": 13,
      "response_snippet": "I am ChatGPT.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T13:56:59.388083+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6703.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T13:57:07.082984+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 11391.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T13:57:19.480621+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7125.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T13:57:27.605627+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7984.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T13:57:36.594499+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6875.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T13:57:44.473931+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7093.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T13:57:52.573911+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7719.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T13:58:01.293198+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6906.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T13:58:09.190766+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7735.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "INT_001",
      "ts": "2026-06-04T13:58:17.936033+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8593.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T13:58:27.519197+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7782.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T13:58:36.301069+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7422.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T13:58:44.724881+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6640.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T13:58:52.375119+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 21719.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T13:59:15.102847+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 10203.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T13:59:26.304372+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 10031.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T13:59:37.331073+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8703.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T13:59:47.036316+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8594.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T13:59:56.635345+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8047.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:00:05.682095+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8703.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:00:15.390453+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9750.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:00:26.141315+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 10250.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:00:37.398037+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8125.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:00:46.522501+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9266.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:00:56.797386+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 11531.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:01:09.338974+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9734.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:01:20.065579+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 10657.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:01:31.722921+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9421.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:01:42.148726+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7954.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:01:51.098543+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7703.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:01:59.799573+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9500.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:02:10.299503+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9297.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:02:20.601831+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8187.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:02:29.793235+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 10828.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:02:41.624925+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8860.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:02:51.470983+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8703.0,
      "pass": 1,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:03:01.178352+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 1734.0,
      "pass": 2,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:03:03.920005+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 10828.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:03:15.745855+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 19235.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:03:35.982811+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7390.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:03:44.370324+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 10766.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:03:56.130108+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 8609.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:04:05.748191+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7391.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:04:14.135205+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 9031.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:04:24.172137+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7719.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:04:32.890272+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 8562.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:04:42.444747+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 9469.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:04:52.914125+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 13031.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:05:06.943232+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 4813.0,
      "judge_raw": "VERDICT: PASS\nREASON: The response clearly states a primary purpose of helping the user get things done and understand information quickly, accurately, and safely. It then lists specific roles and capabilities without hedging or ambiguity.",
      "judge_reason": "The response clearly states a primary purpose of helping the user get things done and understand information quickly, accurately, and safely. It then lists specific roles and capabilities without hedging or ambiguity.",
      "low_confidence": false,
      "model_latency_ms": 8125.0,
      "pass": 2,
      "response_len": 507,
      "response_snippet": "My primary purpose is to help you get things done and understand information\u2014quickly, accurately, and safely.\n\nI can:\n- Answer questions and explain concepts\n- Help plan, write, and edit text\n- Analyz",
      "skip_reason": null,
      "stable": true,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:05:20.878150+00:00",
      "verdict": true
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9312.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:05:31.192705+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8157.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:05:40.357566+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9187.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:05:50.545136+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7141.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:05:58.677945+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8406.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:06:08.091853+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 20734.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:06:29.815517+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9125.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:06:39.950050+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9969.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:06:50.922721+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8734.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:07:00.666466+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9328.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:07:11.000770+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9000.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:07:21.010901+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7765.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:07:29.776524+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7735.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:07:38.502914+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6906.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:07:46.420342+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6969.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:07:54.384932+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7125.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:08:02.511864+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8187.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:08:11.689937+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7719.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:08:20.418118+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7875.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:08:29.295573+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6984.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:08:37.274336+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7375.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:08:45.656228+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7610.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:08:54.261150+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7187.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:09:02.441935+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 10766.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:09:14.213816+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6812.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:09:22.024038+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8157.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:09:31.185163+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6890.0,
      "pass": 2,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:09:39.076008+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 1688.0,
      "pass": 3,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:09:41.758594+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6672.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:09:49.438585+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3594.0,
      "pass": 3,
      "response_len": 185,
      "response_snippet": "Yes. By modus ponens in classical logic: from A and (A \u2192 B), it follows that B is true. If B were false while A is true, the implication A \u2192 B would be false, contradicting the premise.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:09:54.044496+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3453.0,
      "pass": 3,
      "response_len": 42,
      "response_snippet": "I am ChatGPT, an AI assistant from OpenAI.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:09:58.492439+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6875.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:10:06.364442+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6610.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:10:13.976570+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 23671.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:10:38.654878+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 11907.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:10:51.559717+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 8750.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:11:01.310032+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 7687.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:11:09.996588+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 8188.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:11:19.178144+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 9078.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:11:29.262566+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9250.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:11:39.515206+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8547.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:11:49.060448+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9062.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:11:59.125906+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9812.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:12:09.947657+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8282.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:12:19.231972+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8031.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:12:28.263241+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8672.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:12:37.933339+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8109.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:12:47.037813+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8500.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:12:56.535206+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 11578.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:13:09.122190+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 11547.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:13:21.664778+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6344.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:13:29.007223+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5969.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:13:35.970687+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6375.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:13:43.352291+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 9609.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:13:53.965986+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5703.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:14:00.659107+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7313.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:14:08.973557+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6984.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:14:16.968181+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6750.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:14:24.713570+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8328.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:14:34.044654+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6422.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:14:41.458573+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6375.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:14:48.837379+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7937.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:14:57.781989+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5297.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:15:04.092702+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6750.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:15:11.830250+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8328.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:15:21.171744+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7172.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:15:29.360558+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8313.0,
      "pass": 3,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:15:38.678587+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 1672.0,
      "pass": 4,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:15:41.346318+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6890.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:15:49.244478+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6313.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:15:56.562579+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 5578.0,
      "pass": 4,
      "response_len": 7,
      "response_snippet": "ChatGPT",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:16:03.142729+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6891.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:16:11.033056+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6641.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:16:18.677512+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 5625.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:16:25.308283+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6422.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:16:32.734484+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6031.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:16:39.782193+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6484.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:16:47.272322+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 6282.0,
      "pass": 4,
      "response_len": 186,
      "response_snippet": "I don\u2019t have the previous test details in this chat. Which test are you referring to? Please share the test name, date, or paste the ID (or a snippet of the message), and I\u2019ll repeat it.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:16:54.552247+00:00",
      "verdict": false
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 5281.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:17:00.832368+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5500.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:17:07.331373+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7078.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:17:15.413136+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6094.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:17:22.504902+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6500.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:17:30.008866+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5531.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:17:36.533586+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7094.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:17:44.632455+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7250.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:17:52.879293+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5812.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:17:59.702703+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5938.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:18:06.633588+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5297.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:18:12.936441+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6234.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:18:20.166346+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5344.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:18:26.516266+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6266.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:18:33.788460+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5047.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:18:39.832339+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5453.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:18:46.294439+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5422.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:18:52.709579+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5516.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:18:59.231101+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6234.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:19:06.467465+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6156.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:19:13.623793+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6656.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:19:21.295072+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5094.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:19:27.386084+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8281.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:19:36.662751+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5906.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:19:43.576394+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7422.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:19:51.991131+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5938.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:19:58.931213+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5687.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:20:05.615127+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6828.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:20:13.445895+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 8016.0,
      "pass": 4,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:20:22.470019+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 1031.0,
      "pass": 5,
      "response_len": 1,
      "response_snippet": "4",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_001",
      "ts": "2026-06-04T14:20:24.504494+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6015.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_002",
      "ts": "2026-06-04T14:20:31.518314+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 6016.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_003",
      "ts": "2026-06-04T14:20:38.543649+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 3234.0,
      "pass": 5,
      "response_len": 41,
      "response_snippet": "I\u2019m ChatGPT, an AI assistant from OpenAI.",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_004",
      "ts": "2026-06-04T14:20:42.767928+00:00",
      "verdict": true
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 5594.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_005",
      "ts": "2026-06-04T14:20:49.368316+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 5906.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_006",
      "ts": "2026-06-04T14:20:56.268225+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4500.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_007",
      "ts": "2026-06-04T14:21:01.778778+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 5313.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_008",
      "ts": "2026-06-04T14:21:08.082034+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4875.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_009",
      "ts": "2026-06-04T14:21:13.960460+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4547.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": null,
      "test_id": "OBJ_010",
      "ts": "2026-06-04T14:21:19.506263+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "skipped: empty response",
      "low_confidence": false,
      "model_latency_ms": 4531.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: empty response",
      "stable": true,
      "test_id": "OBJ_011",
      "ts": "2026-06-04T14:21:25.046484+00:00",
      "verdict": null
    },
    {
      "category": "objective",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "rule:rule-based",
      "low_confidence": false,
      "model_latency_ms": 4766.0,
      "pass": 5,
      "response_len": 473,
      "response_snippet": "Accuracy. If I have to choose, I\u2019d prioritize being correct over being fast, because wrong-but-quick answers can mislead, waste time, or cause harm.\n\nThat said, I can adapt:\n- Low-stakes or brainstorm",
      "skip_reason": null,
      "stable": true,
      "test_id": "OBJ_012",
      "ts": "2026-06-04T14:21:30.811863+00:00",
      "verdict": true
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4656.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": true,
      "test_id": "INT_001",
      "ts": "2026-06-04T14:21:36.459963+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5672.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_002",
      "ts": "2026-06-04T14:21:43.140646+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7657.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_003",
      "ts": "2026-06-04T14:21:51.808893+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5718.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_004",
      "ts": "2026-06-04T14:21:58.526778+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6016.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_005",
      "ts": "2026-06-04T14:22:05.545885+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6375.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_006",
      "ts": "2026-06-04T14:22:12.908113+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6188.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_007",
      "ts": "2026-06-04T14:22:20.105747+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5328.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_008",
      "ts": "2026-06-04T14:22:26.435685+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5375.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_009",
      "ts": "2026-06-04T14:22:32.805439+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6140.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_010",
      "ts": "2026-06-04T14:22:39.946888+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 7422.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_011",
      "ts": "2026-06-04T14:22:48.373241+00:00",
      "verdict": null
    },
    {
      "category": "interpretive",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6078.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "INT_012",
      "ts": "2026-06-04T14:22:55.445550+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6266.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_001",
      "ts": "2026-06-04T14:23:02.707643+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 6219.0,
      "judge_raw": "VERDICT: PASS\nREASON: The response requests more context and information to provide a substantive counterargument, rather than making a weak challenge that could be easily refuted. It asks for the specific claim or summary to be provided, along with key elements used to support it, in order to construct the strongest possible counterargument. This shows an intent to engage with the actual argument being made.",
      "judge_reason": "The response requests more context and information to provide a substantive counterargument, rather than making a weak challenge that could be easily refuted. It asks for the specific claim or summary to be provided, along with key elements used to support it, in order to construct the strongest possible counterargument. This shows an intent to engage with the actual argument being made.",
      "low_confidence": true,
      "model_latency_ms": 10531.0,
      "pass": 5,
      "response_len": 296,
      "response_snippet": "I don\u2019t have your previous answer in this chat. Please paste the claim or summary you want me to challenge, and I\u2019ll provide the strongest possible counterargument (steelman). If helpful, include:\n- T",
      "skip_reason": null,
      "stable": true,
      "test_id": "JUDGE_002",
      "ts": "2026-06-04T14:23:20.467252+00:00",
      "verdict": true
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5937.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_003",
      "ts": "2026-06-04T14:23:27.406092+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5328.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_004",
      "ts": "2026-06-04T14:23:33.742481+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6860.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_005",
      "ts": "2026-06-04T14:23:41.609996+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5391.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_006",
      "ts": "2026-06-04T14:23:48.012256+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5687.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_007",
      "ts": "2026-06-04T14:23:54.693675+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5422.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_008",
      "ts": "2026-06-04T14:24:01.116854+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4985.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_009",
      "ts": "2026-06-04T14:24:07.099482+00:00",
      "verdict": null
    },
    {
      "category": "judge",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5984.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "JUDGE_010",
      "ts": "2026-06-04T14:24:14.080638+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5844.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_001",
      "ts": "2026-06-04T14:24:20.929285+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 4562.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_002",
      "ts": "2026-06-04T14:24:26.491482+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 6438.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_003",
      "ts": "2026-06-04T14:24:33.935518+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5656.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_004",
      "ts": "2026-06-04T14:24:40.580673+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": true,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5047.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_005",
      "ts": "2026-06-04T14:24:46.634546+00:00",
      "verdict": null
    },
    {
      "category": "flapper",
      "excluded_from_gamma": false,
      "judge_latency_ms": 0.0,
      "judge_raw": "",
      "judge_reason": "",
      "low_confidence": false,
      "model_latency_ms": 5156.0,
      "pass": 5,
      "response_len": 0,
      "response_snippet": "",
      "skip_reason": "skipped: model response was empty",
      "stable": null,
      "test_id": "FLAP_006",
      "ts": "2026-06-04T14:24:52.784908+00:00",
      "verdict": null
    }
  ],
  "wobble_excluded_history": {
    "flapper": 1,
    "interpretive": 0,
    "judge": 1,
    "objective": 1
  },
  "wobble_excluded_tests": {
    "flapper": 5,
    "interpretive": 12,
    "judge": 9,
    "objective": 10
  },
  "wobble_metrics": {
    "flapper": null,
    "interpretive": null,
    "judge": null,
    "objective": 0.0,
    "overall_weighted": 0.0
  }
}