From c72e41687dc9c67b384a9175120a54157a49960d Mon Sep 17 00:00:00 2001 From: HeYan <105071431+MiltonHeYan@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:16:52 -0700 Subject: [PATCH] chore: add open-source contribution scaffolding (#3) * chore: add open-source contribution scaffolding New files --------- LICENSE The README already references this file and the pyproject.toml already declares `license = "MIT"`, but no actual LICENSE file existed in the repo. The badge link was pointing at a 404. CONTRIBUTING.md Setup instructions, guidance on which contributions are welcome (bug fixes, new tasks, scoring changes, docs), branch naming convention, commit style, and a note on adding new tasks with deterministic completion checks. .github/ISSUE_TEMPLATE/bug_report.md .github/ISSUE_TEMPLATE/feature_request.md Structured templates so bug reports arrive with reproduction steps and environment info, and feature requests arrive with motivation and alternatives considered. .github/PULL_REQUEST_TEMPLATE.md Lightweight checklist (what / why / changes / tests) that matches the style of the two bug-fix PRs already merged. pyproject.toml Added [project.urls] with Homepage, Repository, and Bug Tracker so the links appear correctly on PyPI if the package is ever published there. * docs: align contribution scaffolding --------- Co-authored-by: Vincent Koc --- .github/ISSUE_TEMPLATE/bug_report.md | 31 ++++++ .github/ISSUE_TEMPLATE/feature_request.md | 21 ++++ .github/PULL_REQUEST_TEMPLATE.md | 18 +++ CONTRIBUTING.md | 127 ++++++++++++++++++++++ pyproject.toml | 5 + 5 files changed, 202 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 CONTRIBUTING.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..f8349cb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,31 @@ +--- +name: Bug report +about: Something is broken or producing wrong results +labels: bug +--- + +## What happened + + + +## Expected behaviour + + + +## Steps to reproduce + +```bash +# Minimal command / code snippet that triggers the bug +``` + +## Relevant output + +``` +# Full error message, stack trace, or unexpected scoring output +``` + +## Environment + +- Python version: +- OS: +- ClawBench version / commit: diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..b04e915 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,21 @@ +--- +name: Feature request +about: Suggest a new task, scoring improvement, or other enhancement +labels: enhancement +--- + +## Summary + + + +## Motivation + + + +## Proposed approach + + + +## Alternatives considered + + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..0bc3776 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ +## What does this PR do? + + + +## Why? + + + +## Changes + + + +## Tests + + + +- [ ] `python -m pytest -q` passes locally +- [ ] `python -m ruff check clawbench app.py scripts tests` passes locally, or the change is docs-only diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..a0f5ae8 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,127 @@ +# Contributing to ClawBench + +Thank you for your interest in contributing. This document explains how to get +set up, what kinds of contributions are welcome, and how the review process +works. + +--- + +## Getting started + +**Requirements:** Python 3.11+, Docker (for full end-to-end runs). + +```bash +git clone https://github.com/openclaw/clawbench.git +cd clawbench +python -m venv .venv && source .venv/bin/activate +python -m pip install -e ".[dev]" +``` + +Run the test suite to confirm everything is working: + +```bash +python -m pytest -q +python -m ruff check clawbench app.py scripts tests +``` + +The full local suite should pass before you make any changes. + +--- + +## What we welcome + +| Type | Notes | +|------|-------| +| **Bug fixes** | Include a test that reproduces the bug before the fix | +| **New tasks** | See [Adding tasks](#adding-tasks) below | +| **Scoring improvements** | Changes to `trajectory.py`, `scorer.py`, or `judge.py` must include updated tests and a clear rationale | +| **Documentation** | Fixes to README, spec docs, or inline comments | +| **Tooling / CI** | Workflow improvements, linting, dependency updates | + +We are unlikely to merge: +- Large architectural rewrites without prior discussion in an issue +- New dependencies without justification +- Changes that reduce test coverage + +--- + +## Making a change + +1. **Open an issue first** for anything non-trivial. This lets us align on + approach before you invest time writing code. + +2. **Create a branch** from `main`: + ```bash + git checkout -b fix/short-description + ``` + Branch names: `fix/`, `feat/`, `docs/`, `chore/` prefixes. + +3. **Write tests.** Bug fixes must include a test that fails before the fix + and passes after. New features must include tests covering the new + behaviour. + +4. **Run the test suite:** + ```bash + python -m pytest -q + ``` + +5. **Open a pull request** against `main`. Fill in the PR template. + +--- + +## Adding tasks + +Public tasks live in `tasks-public/tier{1-5}/` as YAML files. Domain and +partner tasks live under `tasks-domain/`. Each task needs: + +- A unique `id` and descriptive `name` +- The correct `tier` (1 = simple single-tool, 5 = adversarial/multi-step) +- `completion` checks — at least one deterministic verifier (`execution_checks`, + `file_equality`, or a gateway assertion) +- `trajectory` expectations that reflect how a competent agent should approach + the task +- A `judge` rubric for semantic tasks + +Before submitting a new task, run it against at least one agent to verify the +completion checks fire correctly. + +--- + +## Commit style + +``` +type: short imperative summary (≤72 chars) + +Optional longer explanation. Wrap at 72 chars. Explain *why*, not what — +the diff shows what changed. +``` + +Types: `fix`, `feat`, `docs`, `test`, `chore`, `refactor`. + +--- + +## Code style + +The project uses Ruff and pre-commit for local guardrails. Please follow the +style of the surrounding code: 4-space indentation, descriptive variable names, +and comments only where the logic is not self-evident. + +```bash +python -m ruff check clawbench app.py scripts tests +pre-commit run --files +``` + +--- + +## Reporting bugs + +Use the [bug report template](.github/ISSUE_TEMPLATE/bug_report.md). Include: +- The command you ran +- The full error output or unexpected behaviour +- The Python version and OS + +--- + +## Questions + +Open an issue for questions that are not bug reports or feature requests. diff --git a/pyproject.toml b/pyproject.toml index 080d500..9afa977 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,11 @@ dev = [ "ruff>=0.9,<1", ] +[project.urls] +Homepage = "https://github.com/openclaw/clawbench" +Repository = "https://github.com/openclaw/clawbench" +"Bug Tracker" = "https://github.com/openclaw/clawbench/issues" + [project.scripts] clawbench = "clawbench.cli:main"