clawbench/tests/test_upload.py
2026-04-28 01:17:43 -07:00

64 lines
2.2 KiB
Python

import pytest
from clawbench.schemas import BenchmarkResult
from clawbench.upload import _json_column, _submission_shard_name, upload_result
def test_submission_shard_name_sanitizes_ids():
assert _submission_shard_name("abc/def:ghi") == "abc-def-ghi.parquet"
assert _submission_shard_name("...") == "submission.parquet"
@pytest.mark.asyncio
async def test_upload_result_writes_append_only_submission_shard(monkeypatch):
uploads = []
ensured = []
uploaded_rows = []
class FakeApi:
def __init__(self, token: str) -> None:
self.token = token
def upload_file(self, *, path_or_fileobj: str, path_in_repo: str, repo_id: str, repo_type: str) -> None:
import pandas as pd
uploads.append((path_or_fileobj, path_in_repo, repo_id, repo_type))
uploaded_rows.extend(pd.read_parquet(path_or_fileobj).to_dict(orient="records"))
monkeypatch.setattr("huggingface_hub.HfApi", FakeApi)
monkeypatch.setattr(
"clawbench.upload.ensure_dataset_repo",
lambda api, repo_id: ensured.append((api.token, repo_id)),
)
result = BenchmarkResult(
submission_id="run/123",
model="anthropic/claude-sonnet-4-6",
provider="anthropic",
timestamp="2026-04-28T00:00:00+00:00",
overall_score=0.8,
overall_completion=0.9,
overall_trajectory=0.7,
overall_behavior=0.8,
overall_ci_lower=0.7,
overall_ci_upper=0.9,
overall_pass_hat_k=1.0,
)
url = await upload_result(result, dataset_repo="openclaw/clawbench-results", token="hf_test")
assert url == "https://huggingface.co/datasets/openclaw/clawbench-results"
assert ensured == [("hf_test", "openclaw/clawbench-results")]
assert len(uploads) == 1
local_path, path_in_repo, repo_id, repo_type = uploads[0]
assert local_path.endswith("run-123.parquet")
assert path_in_repo == "data/submissions/run-123.parquet"
assert repo_id == "openclaw/clawbench-results"
assert repo_type == "dataset"
assert uploaded_rows[0]["overall_delivery_outcome_counts"] == "{}"
assert uploaded_rows[0]["task_results"] == "[]"
def test_json_column_is_stable_and_compact():
assert _json_column({"b": 2, "a": 1}) == '{"a":1,"b":2}'