[project] name = "clawbench" version = "0.4.0.dev1" description = "Rigorous benchmark for AI models as OpenClaw agents" readme = "README.md" license = "MIT" requires-python = ">=3.11" dependencies = [ "websockets>=13.0,<15", "pydantic>=2.7,<3", "pyyaml>=6.0,<7", "datasets>=3.0,<4", "gradio>=6.7.0,<7", "pillow>=12.2.0,<13", "httpx>=0.27,<1", "numpy>=1.26,<3", "rich>=13.0,<14", "click>=8.1,<9", # Runtime deps for the task completion verifier. The harness shells out # to `pytest -q` / `pytest-asyncio` inside per-task workspaces as the # execution check; the container must have them in PATH. "pytest>=9.0.3,<10", "pytest-asyncio>=1,<2", ] [project.optional-dependencies] dev = [ # Kept as an alias for historical `pip install .[dev]` invocations. # pytest + pytest-asyncio are now in the base [dependencies] since the # benchmark itself runs pytest in task workspaces. "pytest>=9.0.3,<10", "pytest-asyncio>=1,<2", "pre-commit>=4.0,<5", "ruff>=0.9,<1", ] mlflow = [ "mlflow>=2.10,<3", ] hermes = [ "hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git@main", ] [project.urls] Homepage = "https://github.com/openclaw/clawbench" Repository = "https://github.com/openclaw/clawbench" "Bug Tracker" = "https://github.com/openclaw/clawbench/issues" [project.scripts] clawbench = "clawbench.cli:main" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["clawbench"] force-include = { "tasks-public" = "tasks-public", "tasks-domain" = "tasks-domain", "profiles" = "profiles", "baselines" = "baselines", "CLAWBENCH_V0_4_SPEC.md" = "CLAWBENCH_V0_4_SPEC.md", "PARTNER_TRACE_SPEC.md" = "PARTNER_TRACE_SPEC.md" } [tool.hatch.metadata] allow-direct-references = true [tool.pytest.ini_options] asyncio_mode = "auto" addopts = ["-p", "no:opik"] testpaths = ["tests"] [tool.ruff] line-length = 100 target-version = "py311" [tool.ruff.lint] select = ["E4", "E7", "E9", "F"] ignore = ["E402"]