fix(tests): bare pytest flags pass through run_tests.sh without a '--' separator (#54008)

The parallel runner only forwarded pytest args after a literal '--', so a bare 'scripts/run_tests.sh tests/foo.py -q' (or -v/-x/-k/--tb=long) errored out with 'unrecognized arguments'. This contradicted the docstring's promise that common pytest flags pass through, and forced a retry on every run that used pytest muscle-memory. Now any token starting with '-' that isn't one of the runner's own options (-j/--jobs, --paths, --slice, --file-timeout, --generate-slices, --files, --include-integration) is routed to each per-file pytest invocation automatically. Value-taking flags given space-separated (-k expr, -m mark, -p plugin, -o name=val, etc.) keep their value instead of having it stolen by positional-path discovery. The explicit '--' separator still works and stacks with bare flags. - scripts/run_tests_parallel.py: argv splitter routes bare unknown flags to pytest; value-flag lookahead; updated docstring. - scripts/run_tests.sh: usage comment reflects bare-flag passthrough. - tests/test_run_tests_parallel.py: 4 behavior-contract tests (bare -q runs, -k keeps its value/filters, '--' still works, positional path stays a root).
2026-06-27 22:43:26 -07:00 · 2026-06-27 22:43:26 -07:00 · 2523917680
commit 2523917680
parent c918d42d88
3 changed files with 164 additions and 13 deletions
--- a/scripts/run_tests.sh
+++ b/scripts/run_tests.sh
@ -19,12 +19,17 @@
 #   scripts/run_tests.sh tests/agent/               # discover only here
 #   scripts/run_tests.sh tests/agent/ tests/acp/    # multiple roots
 #   scripts/run_tests.sh tests/foo.py               # single file
-#   scripts/run_tests.sh tests/foo.py -- --tb=long  # path + pytest args
-#   scripts/run_tests.sh -- -v --tb=long            # pytest args only
+#   scripts/run_tests.sh tests/foo.py -q            # path + bare pytest flag
+#   scripts/run_tests.sh tests/foo.py -v --tb=long  # bare flags "just work"
+#   scripts/run_tests.sh -k 'pattern'               # value flags pass through too
+#   scripts/run_tests.sh tests/foo.py -- --tb=long  # explicit '--' still works
 #
-# Everything after a literal '--' is passed through to each per-file
-# pytest invocation. Positional path arguments before '--' override
-# the default discovery root (tests/).
+# Bare pytest flags (anything starting with '-' that isn't one of this
+# runner's own options: -j/--jobs, --paths, --slice, --file-timeout, etc.)
+# are forwarded to each per-file pytest invocation automatically — no '--'
+# separator required. The explicit '--' form still works and stacks with
+# bare flags. Positional path arguments override the default discovery
+# root (tests/).

 set -euo pipefail

--- a/scripts/run_tests_parallel.py
+++ b/scripts/run_tests_parallel.py
@ -25,8 +25,10 @@ Why drop xdist entirely?
 Usage:
    python scripts/run_tests_parallel.py [pytest_args...]

-    Common pytest args pass through (e.g. ``-v``, ``-x``, ``--tb=long``,
-    ``-k 'pattern'``, ``--lf``).
+    Common pytest args pass through to each per-file pytest invocation
+    (e.g. ``-q``, ``-v``, ``-x``, ``--tb=long``, ``-k 'pattern'``, ``--lf``)
+    with no special separator — a bare ``-q`` "just works". Anything after
+    a literal ``--`` is also passed through, and stacks with bare flags.

 Environment:
    HERMES_TEST_WORKERS  Override worker count (default: os.cpu_count())
@ -656,17 +658,69 @@ def main() -> int:
            "separator is passed through to each per-file pytest invocation."
        ),
    )
-    # Manually split argv on '--' so positional paths and pytest passthrough
-    # args don't fight over each other. argparse's nargs="*" positional is
-    # greedy and will swallow everything after '--' including the pytest
-    # flags, defeating the convention.
+    # Split argv into "our flags + positional paths" vs "pytest passthrough".
+    #
+    # Two ways to pass args through to the per-file pytest invocation:
+    #   1. Explicit ``--`` separator: everything after it goes to pytest.
+    #   2. Bare pytest flags anywhere before ``--``: any token starting with
+    #      ``-`` that isn't one of OUR options is routed to pytest, so a bare
+    #      ``-q`` / ``-v`` / ``-x`` / ``--tb=long`` / ``-k expr`` "just works"
+    #      without the developer remembering the ``--``. This matches the
+    #      docstring's promise and pytest muscle-memory.
+    #
+    # The subtlety bare-flag routing must handle: value-taking pytest flags
+    # given in space-separated form (``-k expr``, ``-m mark``, ``-p plugin``,
+    # ``-o name=val``). Naively, ``expr`` would look like a positional path and
+    # clobber discovery. We peel the following token along with such flags so
+    # it never reaches our positional ``paths``. ``=``-joined forms
+    # (``-k=expr``, ``--tb=long``) are self-contained and need no lookahead.
+    OUR_FLAGS = {
+        "-j", "--jobs", "--paths", "--include-integration",
+        "--file-timeout", "--slice", "--generate-slices", "--files",
+    }
+    # pytest short flags that consume the NEXT token as their value.
+    PYTEST_VALUE_FLAGS = {"-k", "-m", "-p", "-o", "-c", "-r", "-W"}
+
+    def _is_our_flag(tok: str) -> bool:
+        # Match exact (``-j``, ``--paths``), ``=``-joined (``--paths=x``),
+        # and attached short-value (``-j4``) forms of our own options.
+        if tok in OUR_FLAGS:
+            return True
+        head = tok.split("=", 1)[0]
+        if head in OUR_FLAGS:
+            return True
+        # Attached short value, e.g. ``-j4`` → ``-j``.
+        if len(tok) > 2 and tok[:2] in OUR_FLAGS and not tok[1] == "-":
+            return True
+        return False
+
    argv = sys.argv[1:]
    if "--" in argv:
        sep = argv.index("--")
-        our_args, pytest_passthrough = argv[:sep], argv[sep + 1 :]
+        before, explicit_passthrough = argv[:sep], argv[sep + 1 :]
    else:
-        our_args, pytest_passthrough = argv, []
+        before, explicit_passthrough = argv, []
+
+    our_args: List[str] = []
+    bare_passthrough: List[str] = []
+    i = 0
+    while i < len(before):
+        tok = before[i]
+        if tok.startswith("-") and not _is_our_flag(tok):
+            bare_passthrough.append(tok)
+            # Pull the value token for space-separated value flags.
+            if tok in PYTEST_VALUE_FLAGS and i + 1 < len(before):
+                bare_passthrough.append(before[i + 1])
+                i += 2
+                continue
+        else:
+            our_args.append(tok)
+        i += 1
+
    args = parser.parse_args(our_args)
+    # Bare flags run before any explicit ``--`` passthrough so ordering is
+    # intuitive (``run_tests.sh tests/foo.py -q -- --tb=long`` → ``-q --tb=long``).
+    pytest_passthrough = bare_passthrough + explicit_passthrough

    # Parse --slice (or HERMES_TEST_SLICE) early so we can exit on bad input
    # before doing any expensive discovery.
--- a/tests/test_run_tests_parallel.py
+++ b/tests/test_run_tests_parallel.py
@ -185,3 +185,95 @@ def test_grandchild_leak_is_killed_by_runner(tmp_path: Path) -> None:
            f"diag={diag!r} test_pid={test_pid} test_pgid={test_pgid}; "
            f"runner output:\n{proc.stdout}"
        )
+
+
+# ── Bare pytest-flag passthrough ─────────────────────────────────────────────
+#
+# The runner routes any token starting with ``-`` that isn't one of its own
+# options (``-j``/``--jobs``, ``--paths``, ``--slice``, ``--file-timeout``,
+# ``--generate-slices``, ``--files``, ``--include-integration``) straight
+# through to each per-file pytest invocation — no ``--`` separator required.
+# Before this, a bare ``-q`` errored out with "unrecognized arguments",
+# forcing a retry on every run. These tests are behavior contracts, not
+# snapshots: they assert that bare flags reach pytest and that value-taking
+# flags (``-k expr``) keep their value instead of having it stolen by the
+# positional-path discovery.
+
+
+def _make_probe_dir(tmp_path: Path) -> Path:
+    """Two trivial passing tests, one named test_alpha, one test_beta."""
+    probe_dir = tmp_path / "probe"
+    probe_dir.mkdir()
+    (probe_dir / "test_flagprobe.py").write_text(
+        "def test_alpha():\n    assert True\n\n"
+        "def test_beta():\n    assert True\n"
+    )
+    return probe_dir
+
+
+def _run_runner(probe_dir: Path, *extra: str) -> subprocess.CompletedProcess:
+    repo_root = Path(__file__).resolve().parent.parent
+    runner = repo_root / "scripts" / "run_tests_parallel.py"
+    return subprocess.run(
+        [sys.executable, str(runner), "--paths", str(probe_dir),
+         "-j", "1", "--file-timeout", "30", *extra],
+        cwd=repo_root,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        timeout=60,
+    )
+
+
+def test_bare_q_flag_passes_through(tmp_path: Path) -> None:
+    """A bare ``-q`` (no ``--``) runs clean instead of erroring out."""
+    probe_dir = _make_probe_dir(tmp_path)
+    proc = _run_runner(probe_dir, "-q")
+    assert proc.returncode == 0, proc.stdout
+    assert "unrecognized arguments" not in proc.stdout
+
+
+def test_bare_value_flag_keeps_its_value(tmp_path: Path) -> None:
+    """``-k test_alpha`` reaches pytest as a selector, not as a path.
+
+    The value token (``test_alpha``) must NOT be swallowed by the runner's
+    positional-path discovery — if it were, discovery would look for a path
+    named ``test_alpha``, find nothing, and the run would degrade. We assert
+    the run succeeds AND only one of the two tests was selected (proving the
+    ``-k`` filter actually applied inside pytest).
+    """
+    probe_dir = _make_probe_dir(tmp_path)
+    proc = _run_runner(probe_dir, "-k", "test_alpha")
+    assert proc.returncode == 0, proc.stdout
+    # Exactly one test selected: the per-file summary shows "1✓" (1 passed).
+    # test_beta is deselected by the -k filter.
+    assert "1✓" in proc.stdout or "1 passed" in proc.stdout, proc.stdout
+    assert "2✓" not in proc.stdout, (
+        f"both tests ran — -k filter did not apply:\n{proc.stdout}"
+    )
+
+
+def test_explicit_double_dash_still_works(tmp_path: Path) -> None:
+    """The legacy ``--`` separator keeps working alongside bare flags."""
+    probe_dir = _make_probe_dir(tmp_path)
+    proc = _run_runner(probe_dir, "-q", "--", "--tb=short")
+    assert proc.returncode == 0, proc.stdout
+    assert "unrecognized arguments" not in proc.stdout
+
+
+def test_positional_path_not_treated_as_flag(tmp_path: Path) -> None:
+    """A positional path arg still overrides discovery (not routed to pytest)."""
+    probe_dir = _make_probe_dir(tmp_path)
+    repo_root = Path(__file__).resolve().parent.parent
+    runner = repo_root / "scripts" / "run_tests_parallel.py"
+    # Pass the probe dir positionally (no --paths), plus a bare -q.
+    proc = subprocess.run(
+        [sys.executable, str(runner), str(probe_dir), "-j", "1",
+         "--file-timeout", "30", "-q"],
+        cwd=repo_root, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True, timeout=60,
+    )
+    assert proc.returncode == 0, proc.stdout
+    # Discovery found the probe file (2 tests), proving the positional path
+    # was consumed as a root, not forwarded to pytest as a bad flag.
+    assert "test_flagprobe.py" in proc.stdout, proc.stdout