fix: strip quoted strings before checking for shell redirect operators (#2)

is_mutating_shell_command scanned the raw command string against MUTATING_SHELL_PATTERNS, which includes the bare pattern r">". This caused any command with a > character inside a quoted argument to be classified as a file-writing mutation: grep "count > 5" logs.txt → ("edit", True) # wrong python -c "print(1 > 0)" → ("edit", True) # wrong In classify_shell_command, a mutating=True result suppresses both the READ_ONLY and EXECUTION branches, so these read-only commands fell through to `return "edit", True` instead of "search" or "execute". Fix: strip the contents of quoted strings (both double and single quotes) before scanning for mutation patterns. The redirect operators that actually matter — `>`, `>>`, `2>`, etc. — always appear outside quotes in real shell commands, so stripping quote bodies removes the false positives while preserving all true redirects. Tests added: read-only commands containing > inside quotes must not be flagged, and real redirect commands must still be detected. Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-04-28 13:16:42 -07:00 · 2026-04-28 13:16:42 -07:00 · d21648ad3d
commit d21648ad3d
parent 0625ab7159
2 changed files with 41 additions and 2 deletions
--- a/clawbench/trajectory.py
+++ b/clawbench/trajectory.py
@ -352,8 +352,19 @@ def _normalize_target(value: str) -> str:
    return normalized.lower()


+def _strip_quoted_strings(command: str) -> str:
+    """Remove the contents of quoted strings so that operators inside quotes
+    (e.g. the ``>`` in ``grep "x > 5" file``) are not mistaken for shell
+    redirect operators when scanning for mutation patterns.
+    """
+    result = re.sub(r'"[^"]*"', '""', command)
+    result = re.sub(r"'[^']*'", "''", result)
+    return result
+
+
 def is_mutating_shell_command(command: str) -> bool:
-    return any(re.search(pattern, command, re.IGNORECASE) for pattern in MUTATING_SHELL_PATTERNS)
+    stripped = _strip_quoted_strings(command)
+    return any(re.search(pattern, stripped, re.IGNORECASE) for pattern in MUTATING_SHELL_PATTERNS)


 def looks_like_error(text: str) -> bool:
--- a/tests/test_trajectory.py
+++ b/tests/test_trajectory.py
@ -1,5 +1,5 @@
 from clawbench.schemas import ToolCall, TrajectoryExpectations, Transcript, TranscriptMessage
-from clawbench.trajectory import classify_tool_call, evaluate_trajectory
+from clawbench.trajectory import classify_shell_command, classify_tool_call, evaluate_trajectory


 def test_trajectory_rewards_read_before_write_and_self_verification():
@ -159,6 +159,34 @@ def test_str_replace_mutation_is_detected_in_trajectory():
    assert result.read_before_write_ratio == 1.0


+def test_shell_redirect_vs_quoted_operator():
+    # The `>` character inside a quoted grep/python argument must NOT be
+    # treated as a shell redirect. Before the fix, MUTATING_SHELL_PATTERNS
+    # contained a bare r">" which matched any `>` in the command string,
+    # causing read-only commands like `grep "x > 0"` to be classified as
+    # ("edit", True) instead of ("search", False).
+    read_only_cases = [
+        'grep "count > 5" logs.txt',
+        "grep '>' file.txt",
+        'python -c "print(1 > 0)"',
+        "awk '{if ($1 > 10) print}' data.txt",
+    ]
+    for cmd in read_only_cases:
+        family, mutating = classify_shell_command(cmd)
+        assert not mutating, f"falsely flagged as mutating: {cmd!r}"
+
+    # Real redirects must still be detected.
+    mutating_cases = [
+        "echo hello > output.txt",
+        "echo hello >> output.txt",
+        "cat file.txt > copy.txt",
+        "sed -i 's/a/b/' file",
+    ]
+    for cmd in mutating_cases:
+        _, mutating = classify_shell_command(cmd)
+        assert mutating, f"redirect not detected: {cmd!r}"
+
+
 def test_find_replace_mutation_is_not_misclassified_as_search():
    transcript = Transcript(
        messages=[