* origin/main: fix(worker): harden runtime result writes fix(client): clean pending rpc on send failure test: cover environment verifier success paths test: cover judge score gate propagation fix(scoring): gate judge-weighted scores fix(runtime): harden benchmark cache and task paths fix: flag credential file access in dangerous shell patterns (#6) fix: flag git push --force variants as dangerous shell commands (#5) chore: add open-source contribution scaffolding (#3) fix: strip quoted strings before checking for shell redirect operators (#2)
70 lines
2.0 KiB
TOML
70 lines
2.0 KiB
TOML
[project]
|
|
name = "clawbench"
|
|
version = "0.4.0.dev1"
|
|
description = "Rigorous benchmark for AI models as OpenClaw agents"
|
|
readme = "README.md"
|
|
license = "MIT"
|
|
requires-python = ">=3.11"
|
|
dependencies = [
|
|
"websockets>=13.0,<15",
|
|
"pydantic>=2.7,<3",
|
|
"pyyaml>=6.0,<7",
|
|
"datasets>=3.0,<4",
|
|
"gradio>=5.0,<6",
|
|
"httpx>=0.27,<1",
|
|
"numpy>=1.26,<3",
|
|
"rich>=13.0,<14",
|
|
"click>=8.1,<9",
|
|
# Runtime deps for the task completion verifier. The harness shells out
|
|
# to `pytest -q` / `pytest-asyncio` inside per-task workspaces as the
|
|
# execution check; the container must have them in PATH.
|
|
"pytest>=8.0,<9",
|
|
"pytest-asyncio>=0.24,<1",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
# Kept as an alias for historical `pip install .[dev]` invocations.
|
|
# pytest + pytest-asyncio are now in the base [dependencies] since the
|
|
# benchmark itself runs pytest in task workspaces.
|
|
"pytest>=8.0,<9",
|
|
"pytest-asyncio>=0.24,<1",
|
|
"pre-commit>=4.0,<5",
|
|
"ruff>=0.9,<1",
|
|
]
|
|
hermes = [
|
|
"hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git@main",
|
|
]
|
|
|
|
[project.urls]
|
|
Homepage = "https://github.com/openclaw/clawbench"
|
|
Repository = "https://github.com/openclaw/clawbench"
|
|
"Bug Tracker" = "https://github.com/openclaw/clawbench/issues"
|
|
|
|
[project.scripts]
|
|
clawbench = "clawbench.cli:main"
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["clawbench"]
|
|
force-include = { "tasks-public" = "tasks-public", "tasks-domain" = "tasks-domain", "profiles" = "profiles", "baselines" = "baselines", "CLAWBENCH_V0_4_SPEC.md" = "CLAWBENCH_V0_4_SPEC.md", "PARTNER_TRACE_SPEC.md" = "PARTNER_TRACE_SPEC.md" }
|
|
|
|
[tool.hatch.metadata]
|
|
allow-direct-references = true
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
addopts = ["-p", "no:opik"]
|
|
testpaths = ["tests"]
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py311"
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E4", "E7", "E9", "F"]
|
|
ignore = ["E402"]
|