74 lines
2.0 KiB
TOML
74 lines
2.0 KiB
TOML
[project]
|
|
name = "clawbench"
|
|
version = "0.4.0.dev1"
|
|
description = "Rigorous benchmark for AI models as OpenClaw agents"
|
|
readme = "README.md"
|
|
license = "MIT"
|
|
requires-python = ">=3.11"
|
|
dependencies = [
|
|
"websockets>=13.0,<15",
|
|
"pydantic>=2.7,<3",
|
|
"pyyaml>=6.0,<7",
|
|
"datasets>=3.0,<4",
|
|
"gradio>=6.7.0,<7",
|
|
"pillow>=12.2.0,<13",
|
|
"httpx>=0.27,<1",
|
|
"numpy>=1.26,<3",
|
|
"rich>=13.0,<14",
|
|
"click>=8.1,<9",
|
|
# Runtime deps for the task completion verifier. The harness shells out
|
|
# to `pytest -q` / `pytest-asyncio` inside per-task workspaces as the
|
|
# execution check; the container must have them in PATH.
|
|
"pytest>=9.0.3,<10",
|
|
"pytest-asyncio>=1,<2",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
# Kept as an alias for historical `pip install .[dev]` invocations.
|
|
# pytest + pytest-asyncio are now in the base [dependencies] since the
|
|
# benchmark itself runs pytest in task workspaces.
|
|
"pytest>=9.0.3,<10",
|
|
"pytest-asyncio>=1,<2",
|
|
"pre-commit>=4.0,<5",
|
|
"ruff>=0.9,<1",
|
|
]
|
|
mlflow = [
|
|
"mlflow>=2.10,<3",
|
|
]
|
|
hermes = [
|
|
"hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git@main",
|
|
]
|
|
|
|
[project.urls]
|
|
Homepage = "https://github.com/openclaw/clawbench"
|
|
Repository = "https://github.com/openclaw/clawbench"
|
|
"Bug Tracker" = "https://github.com/openclaw/clawbench/issues"
|
|
|
|
[project.scripts]
|
|
clawbench = "clawbench.cli:main"
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["clawbench"]
|
|
force-include = { "tasks-public" = "tasks-public", "tasks-domain" = "tasks-domain", "profiles" = "profiles", "baselines" = "baselines", "CLAWBENCH_V0_4_SPEC.md" = "CLAWBENCH_V0_4_SPEC.md", "PARTNER_TRACE_SPEC.md" = "PARTNER_TRACE_SPEC.md" }
|
|
|
|
[tool.hatch.metadata]
|
|
allow-direct-references = true
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
addopts = ["-p", "no:opik"]
|
|
testpaths = ["tests"]
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py311"
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E4", "E7", "E9", "F"]
|
|
ignore = ["E402"]
|