diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 20deae4c9..a54b7163d 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -19,12 +19,17 @@ # scripts/run_tests.sh tests/agent/ # discover only here # scripts/run_tests.sh tests/agent/ tests/acp/ # multiple roots # scripts/run_tests.sh tests/foo.py # single file -# scripts/run_tests.sh tests/foo.py -- --tb=long # path + pytest args -# scripts/run_tests.sh -- -v --tb=long # pytest args only +# scripts/run_tests.sh tests/foo.py -q # path + bare pytest flag +# scripts/run_tests.sh tests/foo.py -v --tb=long # bare flags "just work" +# scripts/run_tests.sh -k 'pattern' # value flags pass through too +# scripts/run_tests.sh tests/foo.py -- --tb=long # explicit '--' still works # -# Everything after a literal '--' is passed through to each per-file -# pytest invocation. Positional path arguments before '--' override -# the default discovery root (tests/). +# Bare pytest flags (anything starting with '-' that isn't one of this +# runner's own options: -j/--jobs, --paths, --slice, --file-timeout, etc.) +# are forwarded to each per-file pytest invocation automatically — no '--' +# separator required. The explicit '--' form still works and stacks with +# bare flags. Positional path arguments override the default discovery +# root (tests/). set -euo pipefail diff --git a/scripts/run_tests_parallel.py b/scripts/run_tests_parallel.py index 0128bcfbe..edbd1669b 100755 --- a/scripts/run_tests_parallel.py +++ b/scripts/run_tests_parallel.py @@ -25,8 +25,10 @@ Why drop xdist entirely? Usage: python scripts/run_tests_parallel.py [pytest_args...] - Common pytest args pass through (e.g. ``-v``, ``-x``, ``--tb=long``, - ``-k 'pattern'``, ``--lf``). + Common pytest args pass through to each per-file pytest invocation + (e.g. ``-q``, ``-v``, ``-x``, ``--tb=long``, ``-k 'pattern'``, ``--lf``) + with no special separator — a bare ``-q`` "just works". Anything after + a literal ``--`` is also passed through, and stacks with bare flags. Environment: HERMES_TEST_WORKERS Override worker count (default: os.cpu_count()) @@ -656,17 +658,69 @@ def main() -> int: "separator is passed through to each per-file pytest invocation." ), ) - # Manually split argv on '--' so positional paths and pytest passthrough - # args don't fight over each other. argparse's nargs="*" positional is - # greedy and will swallow everything after '--' including the pytest - # flags, defeating the convention. + # Split argv into "our flags + positional paths" vs "pytest passthrough". + # + # Two ways to pass args through to the per-file pytest invocation: + # 1. Explicit ``--`` separator: everything after it goes to pytest. + # 2. Bare pytest flags anywhere before ``--``: any token starting with + # ``-`` that isn't one of OUR options is routed to pytest, so a bare + # ``-q`` / ``-v`` / ``-x`` / ``--tb=long`` / ``-k expr`` "just works" + # without the developer remembering the ``--``. This matches the + # docstring's promise and pytest muscle-memory. + # + # The subtlety bare-flag routing must handle: value-taking pytest flags + # given in space-separated form (``-k expr``, ``-m mark``, ``-p plugin``, + # ``-o name=val``). Naively, ``expr`` would look like a positional path and + # clobber discovery. We peel the following token along with such flags so + # it never reaches our positional ``paths``. ``=``-joined forms + # (``-k=expr``, ``--tb=long``) are self-contained and need no lookahead. + OUR_FLAGS = { + "-j", "--jobs", "--paths", "--include-integration", + "--file-timeout", "--slice", "--generate-slices", "--files", + } + # pytest short flags that consume the NEXT token as their value. + PYTEST_VALUE_FLAGS = {"-k", "-m", "-p", "-o", "-c", "-r", "-W"} + + def _is_our_flag(tok: str) -> bool: + # Match exact (``-j``, ``--paths``), ``=``-joined (``--paths=x``), + # and attached short-value (``-j4``) forms of our own options. + if tok in OUR_FLAGS: + return True + head = tok.split("=", 1)[0] + if head in OUR_FLAGS: + return True + # Attached short value, e.g. ``-j4`` → ``-j``. + if len(tok) > 2 and tok[:2] in OUR_FLAGS and not tok[1] == "-": + return True + return False + argv = sys.argv[1:] if "--" in argv: sep = argv.index("--") - our_args, pytest_passthrough = argv[:sep], argv[sep + 1 :] + before, explicit_passthrough = argv[:sep], argv[sep + 1 :] else: - our_args, pytest_passthrough = argv, [] + before, explicit_passthrough = argv, [] + + our_args: List[str] = [] + bare_passthrough: List[str] = [] + i = 0 + while i < len(before): + tok = before[i] + if tok.startswith("-") and not _is_our_flag(tok): + bare_passthrough.append(tok) + # Pull the value token for space-separated value flags. + if tok in PYTEST_VALUE_FLAGS and i + 1 < len(before): + bare_passthrough.append(before[i + 1]) + i += 2 + continue + else: + our_args.append(tok) + i += 1 + args = parser.parse_args(our_args) + # Bare flags run before any explicit ``--`` passthrough so ordering is + # intuitive (``run_tests.sh tests/foo.py -q -- --tb=long`` → ``-q --tb=long``). + pytest_passthrough = bare_passthrough + explicit_passthrough # Parse --slice (or HERMES_TEST_SLICE) early so we can exit on bad input # before doing any expensive discovery. diff --git a/tests/test_run_tests_parallel.py b/tests/test_run_tests_parallel.py index 743ba7921..3cba46fab 100644 --- a/tests/test_run_tests_parallel.py +++ b/tests/test_run_tests_parallel.py @@ -185,3 +185,95 @@ def test_grandchild_leak_is_killed_by_runner(tmp_path: Path) -> None: f"diag={diag!r} test_pid={test_pid} test_pgid={test_pgid}; " f"runner output:\n{proc.stdout}" ) + + +# ── Bare pytest-flag passthrough ───────────────────────────────────────────── +# +# The runner routes any token starting with ``-`` that isn't one of its own +# options (``-j``/``--jobs``, ``--paths``, ``--slice``, ``--file-timeout``, +# ``--generate-slices``, ``--files``, ``--include-integration``) straight +# through to each per-file pytest invocation — no ``--`` separator required. +# Before this, a bare ``-q`` errored out with "unrecognized arguments", +# forcing a retry on every run. These tests are behavior contracts, not +# snapshots: they assert that bare flags reach pytest and that value-taking +# flags (``-k expr``) keep their value instead of having it stolen by the +# positional-path discovery. + + +def _make_probe_dir(tmp_path: Path) -> Path: + """Two trivial passing tests, one named test_alpha, one test_beta.""" + probe_dir = tmp_path / "probe" + probe_dir.mkdir() + (probe_dir / "test_flagprobe.py").write_text( + "def test_alpha():\n assert True\n\n" + "def test_beta():\n assert True\n" + ) + return probe_dir + + +def _run_runner(probe_dir: Path, *extra: str) -> subprocess.CompletedProcess: + repo_root = Path(__file__).resolve().parent.parent + runner = repo_root / "scripts" / "run_tests_parallel.py" + return subprocess.run( + [sys.executable, str(runner), "--paths", str(probe_dir), + "-j", "1", "--file-timeout", "30", *extra], + cwd=repo_root, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + timeout=60, + ) + + +def test_bare_q_flag_passes_through(tmp_path: Path) -> None: + """A bare ``-q`` (no ``--``) runs clean instead of erroring out.""" + probe_dir = _make_probe_dir(tmp_path) + proc = _run_runner(probe_dir, "-q") + assert proc.returncode == 0, proc.stdout + assert "unrecognized arguments" not in proc.stdout + + +def test_bare_value_flag_keeps_its_value(tmp_path: Path) -> None: + """``-k test_alpha`` reaches pytest as a selector, not as a path. + + The value token (``test_alpha``) must NOT be swallowed by the runner's + positional-path discovery — if it were, discovery would look for a path + named ``test_alpha``, find nothing, and the run would degrade. We assert + the run succeeds AND only one of the two tests was selected (proving the + ``-k`` filter actually applied inside pytest). + """ + probe_dir = _make_probe_dir(tmp_path) + proc = _run_runner(probe_dir, "-k", "test_alpha") + assert proc.returncode == 0, proc.stdout + # Exactly one test selected: the per-file summary shows "1✓" (1 passed). + # test_beta is deselected by the -k filter. + assert "1✓" in proc.stdout or "1 passed" in proc.stdout, proc.stdout + assert "2✓" not in proc.stdout, ( + f"both tests ran — -k filter did not apply:\n{proc.stdout}" + ) + + +def test_explicit_double_dash_still_works(tmp_path: Path) -> None: + """The legacy ``--`` separator keeps working alongside bare flags.""" + probe_dir = _make_probe_dir(tmp_path) + proc = _run_runner(probe_dir, "-q", "--", "--tb=short") + assert proc.returncode == 0, proc.stdout + assert "unrecognized arguments" not in proc.stdout + + +def test_positional_path_not_treated_as_flag(tmp_path: Path) -> None: + """A positional path arg still overrides discovery (not routed to pytest).""" + probe_dir = _make_probe_dir(tmp_path) + repo_root = Path(__file__).resolve().parent.parent + runner = repo_root / "scripts" / "run_tests_parallel.py" + # Pass the probe dir positionally (no --paths), plus a bare -q. + proc = subprocess.run( + [sys.executable, str(runner), str(probe_dir), "-j", "1", + "--file-timeout", "30", "-q"], + cwd=repo_root, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, timeout=60, + ) + assert proc.returncode == 0, proc.stdout + # Discovery found the probe file (2 tests), proving the positional path + # was consumed as a root, not forwarded to pytest as a bad flag. + assert "test_flagprobe.py" in proc.stdout, proc.stdout