fix: strip quoted strings before checking for shell redirect operators (#2)
is_mutating_shell_command scanned the raw command string against
MUTATING_SHELL_PATTERNS, which includes the bare pattern r">". This
caused any command with a > character inside a quoted argument to be
classified as a file-writing mutation:
grep "count > 5" logs.txt → ("edit", True) # wrong
python -c "print(1 > 0)" → ("edit", True) # wrong
In classify_shell_command, a mutating=True result suppresses both the
READ_ONLY and EXECUTION branches, so these read-only commands fell
through to `return "edit", True` instead of "search" or "execute".
Fix: strip the contents of quoted strings (both double and single
quotes) before scanning for mutation patterns. The redirect operators
that actually matter — `>`, `>>`, `2>`, etc. — always appear outside
quotes in real shell commands, so stripping quote bodies removes the
false positives while preserving all true redirects.
Tests added: read-only commands containing > inside quotes must not be
flagged, and real redirect commands must still be detected.
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
parent
0625ab7159
commit
d21648ad3d
@ -352,8 +352,19 @@ def _normalize_target(value: str) -> str:
|
||||
return normalized.lower()
|
||||
|
||||
|
||||
def _strip_quoted_strings(command: str) -> str:
|
||||
"""Remove the contents of quoted strings so that operators inside quotes
|
||||
(e.g. the ``>`` in ``grep "x > 5" file``) are not mistaken for shell
|
||||
redirect operators when scanning for mutation patterns.
|
||||
"""
|
||||
result = re.sub(r'"[^"]*"', '""', command)
|
||||
result = re.sub(r"'[^']*'", "''", result)
|
||||
return result
|
||||
|
||||
|
||||
def is_mutating_shell_command(command: str) -> bool:
|
||||
return any(re.search(pattern, command, re.IGNORECASE) for pattern in MUTATING_SHELL_PATTERNS)
|
||||
stripped = _strip_quoted_strings(command)
|
||||
return any(re.search(pattern, stripped, re.IGNORECASE) for pattern in MUTATING_SHELL_PATTERNS)
|
||||
|
||||
|
||||
def looks_like_error(text: str) -> bool:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from clawbench.schemas import ToolCall, TrajectoryExpectations, Transcript, TranscriptMessage
|
||||
from clawbench.trajectory import classify_tool_call, evaluate_trajectory
|
||||
from clawbench.trajectory import classify_shell_command, classify_tool_call, evaluate_trajectory
|
||||
|
||||
|
||||
def test_trajectory_rewards_read_before_write_and_self_verification():
|
||||
@ -159,6 +159,34 @@ def test_str_replace_mutation_is_detected_in_trajectory():
|
||||
assert result.read_before_write_ratio == 1.0
|
||||
|
||||
|
||||
def test_shell_redirect_vs_quoted_operator():
|
||||
# The `>` character inside a quoted grep/python argument must NOT be
|
||||
# treated as a shell redirect. Before the fix, MUTATING_SHELL_PATTERNS
|
||||
# contained a bare r">" which matched any `>` in the command string,
|
||||
# causing read-only commands like `grep "x > 0"` to be classified as
|
||||
# ("edit", True) instead of ("search", False).
|
||||
read_only_cases = [
|
||||
'grep "count > 5" logs.txt',
|
||||
"grep '>' file.txt",
|
||||
'python -c "print(1 > 0)"',
|
||||
"awk '{if ($1 > 10) print}' data.txt",
|
||||
]
|
||||
for cmd in read_only_cases:
|
||||
family, mutating = classify_shell_command(cmd)
|
||||
assert not mutating, f"falsely flagged as mutating: {cmd!r}"
|
||||
|
||||
# Real redirects must still be detected.
|
||||
mutating_cases = [
|
||||
"echo hello > output.txt",
|
||||
"echo hello >> output.txt",
|
||||
"cat file.txt > copy.txt",
|
||||
"sed -i 's/a/b/' file",
|
||||
]
|
||||
for cmd in mutating_cases:
|
||||
_, mutating = classify_shell_command(cmd)
|
||||
assert mutating, f"redirect not detected: {cmd!r}"
|
||||
|
||||
|
||||
def test_find_replace_mutation_is_not_misclassified_as_search():
|
||||
transcript = Transcript(
|
||||
messages=[
|
||||
|
||||
Loading…
Reference in New Issue
Block a user