[project]
name = "clawbench"
version = "0.4.0.dev1"
description = "Rigorous benchmark for AI models as OpenClaw agents"
readme = "README.md"
license = "MIT"
requires-python = ">=3.11"
dependencies = [
    "websockets>=13.0,<15",
    "pydantic>=2.7,<3",
    "pyyaml>=6.0,<7",
    "datasets>=3.0,<4",
    "gradio>=6.7.0,<7",
    "pillow>=12.2.0,<13",
    "httpx>=0.27,<1",
    "numpy>=1.26,<3",
    "rich>=13.0,<14",
    "click>=8.1,<9",
    # Runtime deps for the task completion verifier. The harness shells out
    # to `pytest -q` / `pytest-asyncio` inside per-task workspaces as the
    # execution check; the container must have them in PATH.
    "pytest>=9.0.3,<10",
    "pytest-asyncio>=1,<2",
]

[project.optional-dependencies]
dev = [
    # Kept as an alias for historical `pip install .[dev]` invocations.
    # pytest + pytest-asyncio are now in the base [dependencies] since the
    # benchmark itself runs pytest in task workspaces.
    "pytest>=9.0.3,<10",
    "pytest-asyncio>=1,<2",
    "pre-commit>=4.0,<5",
    "ruff>=0.9,<1",
]
mlflow = [
    "mlflow>=2.10,<3",
]
hermes = [
    "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git@main",
]

[project.urls]
Homepage = "https://github.com/openclaw/clawbench"
Repository = "https://github.com/openclaw/clawbench"
"Bug Tracker" = "https://github.com/openclaw/clawbench/issues"

[project.scripts]
clawbench = "clawbench.cli:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["clawbench"]
force-include = { "tasks-public" = "tasks-public", "tasks-domain" = "tasks-domain", "profiles" = "profiles", "baselines" = "baselines", "CLAWBENCH_V0_4_SPEC.md" = "CLAWBENCH_V0_4_SPEC.md", "PARTNER_TRACE_SPEC.md" = "PARTNER_TRACE_SPEC.md" }

[tool.hatch.metadata]
allow-direct-references = true

[tool.pytest.ini_options]
asyncio_mode = "auto"
addopts = ["-p", "no:opik"]
testpaths = ["tests"]

[tool.ruff]
line-length = 100
target-version = "py311"

[tool.ruff.lint]
select = ["E4", "E7", "E9", "F"]
ignore = ["E402"]