clawbench/pyproject.toml
Vincent Koc 82eaadbc61
Merge remote-tracking branch 'origin/main' into pr17-nonrewrite
* origin/main:
  fix(worker): harden runtime result writes
  fix(client): clean pending rpc on send failure
  test: cover environment verifier success paths
  test: cover judge score gate propagation
  fix(scoring): gate judge-weighted scores
  fix(runtime): harden benchmark cache and task paths
  fix: flag credential file access in dangerous shell patterns (#6)
  fix: flag git push --force variants as dangerous shell commands (#5)
  chore: add open-source contribution scaffolding (#3)
  fix: strip quoted strings before checking for shell redirect operators (#2)
2026-04-29 13:52:41 -07:00

70 lines
2.0 KiB
TOML

[project]
name = "clawbench"
version = "0.4.0.dev1"
description = "Rigorous benchmark for AI models as OpenClaw agents"
readme = "README.md"
license = "MIT"
requires-python = ">=3.11"
dependencies = [
"websockets>=13.0,<15",
"pydantic>=2.7,<3",
"pyyaml>=6.0,<7",
"datasets>=3.0,<4",
"gradio>=5.0,<6",
"httpx>=0.27,<1",
"numpy>=1.26,<3",
"rich>=13.0,<14",
"click>=8.1,<9",
# Runtime deps for the task completion verifier. The harness shells out
# to `pytest -q` / `pytest-asyncio` inside per-task workspaces as the
# execution check; the container must have them in PATH.
"pytest>=8.0,<9",
"pytest-asyncio>=0.24,<1",
]
[project.optional-dependencies]
dev = [
# Kept as an alias for historical `pip install .[dev]` invocations.
# pytest + pytest-asyncio are now in the base [dependencies] since the
# benchmark itself runs pytest in task workspaces.
"pytest>=8.0,<9",
"pytest-asyncio>=0.24,<1",
"pre-commit>=4.0,<5",
"ruff>=0.9,<1",
]
hermes = [
"hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git@main",
]
[project.urls]
Homepage = "https://github.com/openclaw/clawbench"
Repository = "https://github.com/openclaw/clawbench"
"Bug Tracker" = "https://github.com/openclaw/clawbench/issues"
[project.scripts]
clawbench = "clawbench.cli:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["clawbench"]
force-include = { "tasks-public" = "tasks-public", "tasks-domain" = "tasks-domain", "profiles" = "profiles", "baselines" = "baselines", "CLAWBENCH_V0_4_SPEC.md" = "CLAWBENCH_V0_4_SPEC.md", "PARTNER_TRACE_SPEC.md" = "PARTNER_TRACE_SPEC.md" }
[tool.hatch.metadata]
allow-direct-references = true
[tool.pytest.ini_options]
asyncio_mode = "auto"
addopts = ["-p", "no:opik"]
testpaths = ["tests"]
[tool.ruff]
line-length = 100
target-version = "py311"
[tool.ruff.lint]
select = ["E4", "E7", "E9", "F"]
ignore = ["E402"]