gpt-5-nano
2026-06-04T14:25:50 · openai · convergence_v30_openai_2026-06-04T14-25-50.json
n/a
γ Overall
5
Passes
40
Passed
0
Errors
γ Breakdown
| Category | Value | Status |
|---|---|---|
| Objective | 0.0000 | γ |
| Interpretive | n/a | γ |
| Judge | n/a | γ |
| Flapper | n/a | γ |
Raw JSON
Show raw data
{
"judge_quality": {
"flapper": {
"low_conf": 0,
"low_conf_rate": 0.0,
"none": 25,
"none_rate": 1.0,
"total": 25
},
"interpretive": {
"low_conf": 0,
"low_conf_rate": 0.0,
"none": 60,
"none_rate": 1.0,
"total": 60
},
"judge": {
"low_conf": 0,
"low_conf_rate": 0.0,
"none": 45,
"none_rate": 1.0,
"total": 45
},
"objective": {
"low_conf": 0,
"low_conf_rate": null,
"none": 0,
"none_rate": null,
"total": 0
}
},
"metadata": {
"category_counts": {
"flapper": 6,
"interpretive": 12,
"judge": 10,
"objective": 12
},
"judge_model": "hermes-3-llama-3.1-8b",
"judge_substrate": "local",
"model": "gpt-5-nano",
"passes": 5,
"substrate": "openai",
"timestamp": "2026-06-04T14:25:50.218421+00:00",
"total_tests": 40,
"version": "3.0"
},
"pass_stats": {
"pass_1": {
"avg_model_latency_ms": 3548.8,
"blank_count": 38,
"max_model_latency_ms": 6532.0
},
"pass_2": {
"avg_model_latency_ms": 3795.7,
"blank_count": 38,
"max_model_latency_ms": 5359.0
},
"pass_3": {
"avg_model_latency_ms": 3861.7,
"blank_count": 38,
"max_model_latency_ms": 4765.0
},
"pass_4": {
"avg_model_latency_ms": 3794.5,
"blank_count": 38,
"max_model_latency_ms": 7187.0
},
"pass_5": {
"avg_model_latency_ms": 3809.7,
"blank_count": 39,
"max_model_latency_ms": 4593.0
}
},
"pass_verdict_vectors": {
"pass_1": {
"flapper": [
null,
null,
null,
null,
null,
null
],
"interpretive": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"judge": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"objective": [
true,
null,
null,
true,
null,
null,
null,
null,
null,
null,
null,
null
]
},
"pass_2": {
"flapper": [
null,
null,
null,
null,
null,
null
],
"interpretive": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"judge": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"objective": [
true,
null,
null,
true,
null,
null,
null,
null,
null,
null,
null,
null
]
},
"pass_3": {
"flapper": [
null,
null,
null,
null,
null,
null
],
"interpretive": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"judge": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"objective": [
true,
null,
null,
true,
null,
null,
null,
null,
null,
null,
null,
null
]
},
"pass_4": {
"flapper": [
null,
null,
null,
null,
null,
null
],
"interpretive": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"judge": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"objective": [
true,
null,
null,
true,
null,
null,
null,
null,
null,
null,
null,
null
]
},
"pass_5": {
"flapper": [
null,
null,
null,
null,
null,
null
],
"interpretive": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"judge": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
"objective": [
true,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
]
}
},
"stats": {
"judge_calls": 0,
"judge_errors": 0,
"judge_retries": 0,
"model_calls": 200,
"model_errors": 0,
"model_retries": 0
},
"verdict_logging": [
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 2359.0,
"pass": 1,
"response_len": 1,
"response_snippet": "4",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_001",
"ts": "2026-06-04T14:25:52.576664+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3250.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_002",
"ts": "2026-06-04T14:25:56.818184+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3453.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_003",
"ts": "2026-06-04T14:26:01.270184+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 2407.0,
"pass": 1,
"response_len": 79,
"response_snippet": "I\u2019m ChatGPT, a large language model by OpenAI, based on the GPT-4 architecture.",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_004",
"ts": "2026-06-04T14:26:04.684631+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4406.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_005",
"ts": "2026-06-04T14:26:10.091445+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3203.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_006",
"ts": "2026-06-04T14:26:14.298267+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3765.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_007",
"ts": "2026-06-04T14:26:19.067979+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3235.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_008",
"ts": "2026-06-04T14:26:23.305128+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3578.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_009",
"ts": "2026-06-04T14:26:27.879061+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3390.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_010",
"ts": "2026-06-04T14:26:32.269948+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3344.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_011",
"ts": "2026-06-04T14:26:36.612647+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3781.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_012",
"ts": "2026-06-04T14:26:41.403186+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4125.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_001",
"ts": "2026-06-04T14:26:46.525303+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3547.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_002",
"ts": "2026-06-04T14:26:51.073465+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4610.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_003",
"ts": "2026-06-04T14:26:56.680566+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3437.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_004",
"ts": "2026-06-04T14:27:01.125188+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3390.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_005",
"ts": "2026-06-04T14:27:05.529654+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3688.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_006",
"ts": "2026-06-04T14:27:10.208585+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3531.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_007",
"ts": "2026-06-04T14:27:14.743917+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4125.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_008",
"ts": "2026-06-04T14:27:19.875768+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3531.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_009",
"ts": "2026-06-04T14:27:24.412764+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3281.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_010",
"ts": "2026-06-04T14:27:28.690542+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3391.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_011",
"ts": "2026-06-04T14:27:33.088862+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3391.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_012",
"ts": "2026-06-04T14:27:37.473487+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3640.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_001",
"ts": "2026-06-04T14:27:42.122162+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3188.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_002",
"ts": "2026-06-04T14:27:46.309316+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3406.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_003",
"ts": "2026-06-04T14:27:50.719835+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 2890.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_004",
"ts": "2026-06-04T14:27:54.616463+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3469.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_005",
"ts": "2026-06-04T14:27:59.085009+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3250.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_006",
"ts": "2026-06-04T14:28:03.336114+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 5219.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_007",
"ts": "2026-06-04T14:28:09.554615+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3125.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_008",
"ts": "2026-06-04T14:28:13.675279+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3500.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_009",
"ts": "2026-06-04T14:28:18.175462+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3500.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_010",
"ts": "2026-06-04T14:28:22.684064+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3484.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_001",
"ts": "2026-06-04T14:28:27.163380+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3266.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_002",
"ts": "2026-06-04T14:28:31.437698+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 6532.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_003",
"ts": "2026-06-04T14:28:38.975231+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3406.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_004",
"ts": "2026-06-04T14:28:43.388609+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3047.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_005",
"ts": "2026-06-04T14:28:47.437028+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 2812.0,
"pass": 1,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_006",
"ts": "2026-06-04T14:28:51.247124+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 2156.0,
"pass": 2,
"response_len": 1,
"response_snippet": "4",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_001",
"ts": "2026-06-04T14:28:54.394661+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4891.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_002",
"ts": "2026-06-04T14:29:00.293516+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3313.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_003",
"ts": "2026-06-04T14:29:04.604896+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 2718.0,
"pass": 2,
"response_len": 89,
"response_snippet": "I am ChatGPT, an AI language model developed by OpenAI (based on the GPT-4 architecture).",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_004",
"ts": "2026-06-04T14:29:08.321369+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3735.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_005",
"ts": "2026-06-04T14:29:13.055702+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4015.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_006",
"ts": "2026-06-04T14:29:18.069261+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3453.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_007",
"ts": "2026-06-04T14:29:22.527729+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3157.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_008",
"ts": "2026-06-04T14:29:26.684902+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3125.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_009",
"ts": "2026-06-04T14:29:30.801868+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3297.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_010",
"ts": "2026-06-04T14:29:35.106796+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3515.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_011",
"ts": "2026-06-04T14:29:39.615996+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3438.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_012",
"ts": "2026-06-04T14:29:44.060514+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3297.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_001",
"ts": "2026-06-04T14:29:48.353549+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 5359.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_002",
"ts": "2026-06-04T14:29:54.714353+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3922.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_003",
"ts": "2026-06-04T14:29:59.631626+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4562.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_004",
"ts": "2026-06-04T14:30:05.197143+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4500.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_005",
"ts": "2026-06-04T14:30:10.703432+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4016.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_006",
"ts": "2026-06-04T14:30:15.728389+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3765.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_007",
"ts": "2026-06-04T14:30:20.491179+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4250.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_008",
"ts": "2026-06-04T14:30:25.744577+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4125.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_009",
"ts": "2026-06-04T14:30:30.875424+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3734.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_010",
"ts": "2026-06-04T14:30:35.614904+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4594.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_011",
"ts": "2026-06-04T14:30:41.217710+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3609.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_012",
"ts": "2026-06-04T14:30:45.825354+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3328.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_001",
"ts": "2026-06-04T14:30:50.157882+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3266.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_002",
"ts": "2026-06-04T14:30:54.430484+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3312.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_003",
"ts": "2026-06-04T14:30:58.741372+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3906.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_004",
"ts": "2026-06-04T14:31:03.644642+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3641.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_005",
"ts": "2026-06-04T14:31:08.295667+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4453.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_006",
"ts": "2026-06-04T14:31:13.747685+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3610.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_007",
"ts": "2026-06-04T14:31:18.353538+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3750.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_008",
"ts": "2026-06-04T14:31:23.110039+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 5031.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_009",
"ts": "2026-06-04T14:31:29.149839+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3563.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_010",
"ts": "2026-06-04T14:31:33.717568+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3734.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_001",
"ts": "2026-06-04T14:31:38.451608+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4094.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_002",
"ts": "2026-06-04T14:31:43.540657+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3813.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_003",
"ts": "2026-06-04T14:31:48.347072+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4093.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_004",
"ts": "2026-06-04T14:31:53.446150+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4000.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_005",
"ts": "2026-06-04T14:31:58.447506+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3688.0,
"pass": 2,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_006",
"ts": "2026-06-04T14:32:03.128541+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 3062.0,
"pass": 3,
"response_len": 118,
"response_snippet": "4\n\nIn base-10 arithmetic, 2 + 2 equals 4. If you\u2019re thinking of a different base or context, tell me and I can adjust.",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_001",
"ts": "2026-06-04T14:32:07.191712+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3953.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_002",
"ts": "2026-06-04T14:32:12.153523+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4266.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_003",
"ts": "2026-06-04T14:32:17.410257+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 3109.0,
"pass": 3,
"response_len": 88,
"response_snippet": "I\u2019m ChatGPT \u2014 an AI language model developed by OpenAI, based on the GPT-4 architecture.",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_004",
"ts": "2026-06-04T14:32:21.522935+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3985.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_005",
"ts": "2026-06-04T14:32:26.505094+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4437.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_006",
"ts": "2026-06-04T14:32:31.951188+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4125.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_007",
"ts": "2026-06-04T14:32:37.077430+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4016.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_008",
"ts": "2026-06-04T14:32:42.092240+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3766.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_009",
"ts": "2026-06-04T14:32:46.858308+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3750.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_010",
"ts": "2026-06-04T14:32:51.595941+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4765.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_011",
"ts": "2026-06-04T14:32:57.363440+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3625.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_012",
"ts": "2026-06-04T14:33:01.986346+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3641.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_001",
"ts": "2026-06-04T14:33:06.636552+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3765.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_002",
"ts": "2026-06-04T14:33:11.396443+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4750.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_003",
"ts": "2026-06-04T14:33:17.146547+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4110.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_004",
"ts": "2026-06-04T14:33:22.265812+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3469.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_005",
"ts": "2026-06-04T14:33:26.750900+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3953.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_006",
"ts": "2026-06-04T14:33:31.706270+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4578.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_007",
"ts": "2026-06-04T14:33:37.288687+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3859.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_008",
"ts": "2026-06-04T14:33:42.149106+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4047.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_009",
"ts": "2026-06-04T14:33:47.204425+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3984.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_010",
"ts": "2026-06-04T14:33:52.199100+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3735.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_011",
"ts": "2026-06-04T14:33:56.939000+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3875.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_012",
"ts": "2026-06-04T14:34:01.815383+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3735.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_001",
"ts": "2026-06-04T14:34:06.550303+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4031.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_002",
"ts": "2026-06-04T14:34:11.590156+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4094.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_003",
"ts": "2026-06-04T14:34:16.676703+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3828.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_004",
"ts": "2026-06-04T14:34:21.510727+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4156.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_005",
"ts": "2026-06-04T14:34:26.665895+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4266.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_006",
"ts": "2026-06-04T14:34:31.931046+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3547.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_007",
"ts": "2026-06-04T14:34:36.477160+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3968.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_008",
"ts": "2026-06-04T14:34:41.445158+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3516.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_009",
"ts": "2026-06-04T14:34:45.963742+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3375.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_010",
"ts": "2026-06-04T14:34:50.336995+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3625.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_001",
"ts": "2026-06-04T14:34:54.970301+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3281.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_002",
"ts": "2026-06-04T14:34:59.265715+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3594.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_003",
"ts": "2026-06-04T14:35:03.852255+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3390.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_004",
"ts": "2026-06-04T14:35:08.247607+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3719.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_005",
"ts": "2026-06-04T14:35:12.969642+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3718.0,
"pass": 3,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_006",
"ts": "2026-06-04T14:35:17.693326+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 3047.0,
"pass": 4,
"response_len": 95,
"response_snippet": "4\n\nIn standard base-10 arithmetic, 2 + 2 = 4. If you want it in another base, I can convert it.",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_001",
"ts": "2026-06-04T14:35:21.746186+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3594.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_002",
"ts": "2026-06-04T14:35:26.345028+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4046.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_003",
"ts": "2026-06-04T14:35:31.401971+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 3891.0,
"pass": 4,
"response_len": 73,
"response_snippet": "I\u2019m ChatGPT, an AI assistant powered by OpenAI. How can I help you today?",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_004",
"ts": "2026-06-04T14:35:36.291100+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3953.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_005",
"ts": "2026-06-04T14:35:41.236790+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4328.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_006",
"ts": "2026-06-04T14:35:46.567106+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3938.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_007",
"ts": "2026-06-04T14:35:51.509637+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3328.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_008",
"ts": "2026-06-04T14:35:55.841269+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3359.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_009",
"ts": "2026-06-04T14:36:00.201900+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3641.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_010",
"ts": "2026-06-04T14:36:04.837313+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3500.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_011",
"ts": "2026-06-04T14:36:09.344401+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3484.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_012",
"ts": "2026-06-04T14:36:13.833368+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3328.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_001",
"ts": "2026-06-04T14:36:18.171677+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3968.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_002",
"ts": "2026-06-04T14:36:23.144846+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3500.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_003",
"ts": "2026-06-04T14:36:27.643304+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3891.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_004",
"ts": "2026-06-04T14:36:32.537461+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3578.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_005",
"ts": "2026-06-04T14:36:37.110794+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3422.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_006",
"ts": "2026-06-04T14:36:41.547667+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3890.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_007",
"ts": "2026-06-04T14:36:46.439366+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 5485.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_008",
"ts": "2026-06-04T14:36:52.932189+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3750.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_009",
"ts": "2026-06-04T14:36:57.687913+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3703.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_010",
"ts": "2026-06-04T14:37:02.403195+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3485.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_011",
"ts": "2026-06-04T14:37:06.885150+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3422.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_012",
"ts": "2026-06-04T14:37:11.307038+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3750.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_001",
"ts": "2026-06-04T14:37:16.063972+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3688.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_002",
"ts": "2026-06-04T14:37:20.754825+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3687.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_003",
"ts": "2026-06-04T14:37:25.440720+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3297.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_004",
"ts": "2026-06-04T14:37:29.748371+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3688.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_005",
"ts": "2026-06-04T14:37:34.430759+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3453.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_006",
"ts": "2026-06-04T14:37:38.891999+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3594.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_007",
"ts": "2026-06-04T14:37:43.500283+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3609.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_008",
"ts": "2026-06-04T14:37:48.120097+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4844.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_009",
"ts": "2026-06-04T14:37:53.965954+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3437.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_010",
"ts": "2026-06-04T14:37:58.403361+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3469.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_001",
"ts": "2026-06-04T14:38:02.870593+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3500.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_002",
"ts": "2026-06-04T14:38:07.363206+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3438.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_003",
"ts": "2026-06-04T14:38:11.812525+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4094.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_004",
"ts": "2026-06-04T14:38:16.923978+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 7187.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_005",
"ts": "2026-06-04T14:38:25.124862+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3515.0,
"pass": 4,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_006",
"ts": "2026-06-04T14:38:29.651110+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "rule:rule-based",
"low_confidence": false,
"model_latency_ms": 1547.0,
"pass": 5,
"response_len": 1,
"response_snippet": "4",
"skip_reason": null,
"stable": true,
"test_id": "OBJ_001",
"ts": "2026-06-04T14:38:32.197887+00:00",
"verdict": true
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3938.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_002",
"ts": "2026-06-04T14:38:37.140448+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 4265.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_003",
"ts": "2026-06-04T14:38:42.401082+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3688.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": true,
"test_id": "OBJ_004",
"ts": "2026-06-04T14:38:47.094043+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3625.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_005",
"ts": "2026-06-04T14:38:51.725764+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3609.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_006",
"ts": "2026-06-04T14:38:56.336787+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3328.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_007",
"ts": "2026-06-04T14:39:00.660649+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3688.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_008",
"ts": "2026-06-04T14:39:05.356235+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3890.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_009",
"ts": "2026-06-04T14:39:10.251408+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3531.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_010",
"ts": "2026-06-04T14:39:14.796514+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3516.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_011",
"ts": "2026-06-04T14:39:19.305222+00:00",
"verdict": null
},
{
"category": "objective",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "skipped: empty response",
"low_confidence": false,
"model_latency_ms": 3968.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: empty response",
"stable": null,
"test_id": "OBJ_012",
"ts": "2026-06-04T14:39:24.282973+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4188.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_001",
"ts": "2026-06-04T14:39:29.476891+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4593.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_002",
"ts": "2026-06-04T14:39:35.079389+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3875.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_003",
"ts": "2026-06-04T14:39:39.956332+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3625.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_004",
"ts": "2026-06-04T14:39:44.589463+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4531.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_005",
"ts": "2026-06-04T14:39:50.125121+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3562.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_006",
"ts": "2026-06-04T14:39:54.702801+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4078.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_007",
"ts": "2026-06-04T14:39:59.797183+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4109.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_008",
"ts": "2026-06-04T14:40:04.914815+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3797.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_009",
"ts": "2026-06-04T14:40:09.707767+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3656.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_010",
"ts": "2026-06-04T14:40:14.370194+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4313.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_011",
"ts": "2026-06-04T14:40:19.682880+00:00",
"verdict": null
},
{
"category": "interpretive",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3609.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "INT_012",
"ts": "2026-06-04T14:40:24.284565+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3781.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_001",
"ts": "2026-06-04T14:40:29.077363+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4141.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_002",
"ts": "2026-06-04T14:40:34.220362+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3796.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_003",
"ts": "2026-06-04T14:40:39.027159+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3438.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_004",
"ts": "2026-06-04T14:40:43.455568+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3937.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_005",
"ts": "2026-06-04T14:40:48.394671+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3672.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_006",
"ts": "2026-06-04T14:40:53.071888+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4016.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_007",
"ts": "2026-06-04T14:40:58.092273+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4547.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_008",
"ts": "2026-06-04T14:41:03.630192+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3640.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_009",
"ts": "2026-06-04T14:41:08.280937+00:00",
"verdict": null
},
{
"category": "judge",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4531.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "JUDGE_010",
"ts": "2026-06-04T14:41:13.816935+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3672.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_001",
"ts": "2026-06-04T14:41:18.492465+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3719.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_002",
"ts": "2026-06-04T14:41:23.211555+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3531.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_003",
"ts": "2026-06-04T14:41:27.745786+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3563.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_004",
"ts": "2026-06-04T14:41:32.299147+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": true,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 4078.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_005",
"ts": "2026-06-04T14:41:37.376444+00:00",
"verdict": null
},
{
"category": "flapper",
"excluded_from_gamma": false,
"judge_latency_ms": 0.0,
"judge_raw": "",
"judge_reason": "",
"low_confidence": false,
"model_latency_ms": 3797.0,
"pass": 5,
"response_len": 0,
"response_snippet": "",
"skip_reason": "skipped: model response was empty",
"stable": null,
"test_id": "FLAP_006",
"ts": "2026-06-04T14:41:42.176898+00:00",
"verdict": null
}
],
"wobble_excluded_history": {
"flapper": 1,
"interpretive": 0,
"judge": 1,
"objective": 1
},
"wobble_excluded_tests": {
"flapper": 5,
"interpretive": 12,
"judge": 9,
"objective": 10
},
"wobble_metrics": {
"flapper": null,
"interpretive": null,
"judge": null,
"objective": 0.0,
"overall_weighted": 0.0
}
}