fix(tests): bare pytest flags pass through run_tests.sh without a '--' separator (#54008)

The parallel runner only forwarded pytest args after a literal '--', so a
bare 'scripts/run_tests.sh tests/foo.py -q' (or -v/-x/-k/--tb=long) errored
out with 'unrecognized arguments'. This contradicted the docstring's
promise that common pytest flags pass through, and forced a retry on every
run that used pytest muscle-memory.

Now any token starting with '-' that isn't one of the runner's own options
(-j/--jobs, --paths, --slice, --file-timeout, --generate-slices, --files,
--include-integration) is routed to each per-file pytest invocation
automatically. Value-taking flags given space-separated (-k expr, -m mark,
-p plugin, -o name=val, etc.) keep their value instead of having it stolen
by positional-path discovery. The explicit '--' separator still works and
stacks with bare flags.

- scripts/run_tests_parallel.py: argv splitter routes bare unknown flags to
  pytest; value-flag lookahead; updated docstring.
- scripts/run_tests.sh: usage comment reflects bare-flag passthrough.
- tests/test_run_tests_parallel.py: 4 behavior-contract tests (bare -q runs,
  -k keeps its value/filters, '--' still works, positional path stays a root).
This commit is contained in:
Teknium 2026-06-27 22:43:26 -07:00 committed by GitHub
parent c918d42d88
commit 2523917680
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 164 additions and 13 deletions

View file

@ -19,12 +19,17 @@
# scripts/run_tests.sh tests/agent/ # discover only here
# scripts/run_tests.sh tests/agent/ tests/acp/ # multiple roots
# scripts/run_tests.sh tests/foo.py # single file
# scripts/run_tests.sh tests/foo.py -- --tb=long # path + pytest args
# scripts/run_tests.sh -- -v --tb=long # pytest args only
# scripts/run_tests.sh tests/foo.py -q # path + bare pytest flag
# scripts/run_tests.sh tests/foo.py -v --tb=long # bare flags "just work"
# scripts/run_tests.sh -k 'pattern' # value flags pass through too
# scripts/run_tests.sh tests/foo.py -- --tb=long # explicit '--' still works
#
# Everything after a literal '--' is passed through to each per-file
# pytest invocation. Positional path arguments before '--' override
# the default discovery root (tests/).
# Bare pytest flags (anything starting with '-' that isn't one of this
# runner's own options: -j/--jobs, --paths, --slice, --file-timeout, etc.)
# are forwarded to each per-file pytest invocation automatically — no '--'
# separator required. The explicit '--' form still works and stacks with
# bare flags. Positional path arguments override the default discovery
# root (tests/).
set -euo pipefail

View file

@ -25,8 +25,10 @@ Why drop xdist entirely?
Usage:
python scripts/run_tests_parallel.py [pytest_args...]
Common pytest args pass through (e.g. ``-v``, ``-x``, ``--tb=long``,
``-k 'pattern'``, ``--lf``).
Common pytest args pass through to each per-file pytest invocation
(e.g. ``-q``, ``-v``, ``-x``, ``--tb=long``, ``-k 'pattern'``, ``--lf``)
with no special separator a bare ``-q`` "just works". Anything after
a literal ``--`` is also passed through, and stacks with bare flags.
Environment:
HERMES_TEST_WORKERS Override worker count (default: os.cpu_count())
@ -656,17 +658,69 @@ def main() -> int:
"separator is passed through to each per-file pytest invocation."
),
)
# Manually split argv on '--' so positional paths and pytest passthrough
# args don't fight over each other. argparse's nargs="*" positional is
# greedy and will swallow everything after '--' including the pytest
# flags, defeating the convention.
# Split argv into "our flags + positional paths" vs "pytest passthrough".
#
# Two ways to pass args through to the per-file pytest invocation:
# 1. Explicit ``--`` separator: everything after it goes to pytest.
# 2. Bare pytest flags anywhere before ``--``: any token starting with
# ``-`` that isn't one of OUR options is routed to pytest, so a bare
# ``-q`` / ``-v`` / ``-x`` / ``--tb=long`` / ``-k expr`` "just works"
# without the developer remembering the ``--``. This matches the
# docstring's promise and pytest muscle-memory.
#
# The subtlety bare-flag routing must handle: value-taking pytest flags
# given in space-separated form (``-k expr``, ``-m mark``, ``-p plugin``,
# ``-o name=val``). Naively, ``expr`` would look like a positional path and
# clobber discovery. We peel the following token along with such flags so
# it never reaches our positional ``paths``. ``=``-joined forms
# (``-k=expr``, ``--tb=long``) are self-contained and need no lookahead.
OUR_FLAGS = {
"-j", "--jobs", "--paths", "--include-integration",
"--file-timeout", "--slice", "--generate-slices", "--files",
}
# pytest short flags that consume the NEXT token as their value.
PYTEST_VALUE_FLAGS = {"-k", "-m", "-p", "-o", "-c", "-r", "-W"}
def _is_our_flag(tok: str) -> bool:
# Match exact (``-j``, ``--paths``), ``=``-joined (``--paths=x``),
# and attached short-value (``-j4``) forms of our own options.
if tok in OUR_FLAGS:
return True
head = tok.split("=", 1)[0]
if head in OUR_FLAGS:
return True
# Attached short value, e.g. ``-j4`` → ``-j``.
if len(tok) > 2 and tok[:2] in OUR_FLAGS and not tok[1] == "-":
return True
return False
argv = sys.argv[1:]
if "--" in argv:
sep = argv.index("--")
our_args, pytest_passthrough = argv[:sep], argv[sep + 1 :]
before, explicit_passthrough = argv[:sep], argv[sep + 1 :]
else:
our_args, pytest_passthrough = argv, []
before, explicit_passthrough = argv, []
our_args: List[str] = []
bare_passthrough: List[str] = []
i = 0
while i < len(before):
tok = before[i]
if tok.startswith("-") and not _is_our_flag(tok):
bare_passthrough.append(tok)
# Pull the value token for space-separated value flags.
if tok in PYTEST_VALUE_FLAGS and i + 1 < len(before):
bare_passthrough.append(before[i + 1])
i += 2
continue
else:
our_args.append(tok)
i += 1
args = parser.parse_args(our_args)
# Bare flags run before any explicit ``--`` passthrough so ordering is
# intuitive (``run_tests.sh tests/foo.py -q -- --tb=long`` → ``-q --tb=long``).
pytest_passthrough = bare_passthrough + explicit_passthrough
# Parse --slice (or HERMES_TEST_SLICE) early so we can exit on bad input
# before doing any expensive discovery.

View file

@ -185,3 +185,95 @@ def test_grandchild_leak_is_killed_by_runner(tmp_path: Path) -> None:
f"diag={diag!r} test_pid={test_pid} test_pgid={test_pgid}; "
f"runner output:\n{proc.stdout}"
)
# ── Bare pytest-flag passthrough ─────────────────────────────────────────────
#
# The runner routes any token starting with ``-`` that isn't one of its own
# options (``-j``/``--jobs``, ``--paths``, ``--slice``, ``--file-timeout``,
# ``--generate-slices``, ``--files``, ``--include-integration``) straight
# through to each per-file pytest invocation — no ``--`` separator required.
# Before this, a bare ``-q`` errored out with "unrecognized arguments",
# forcing a retry on every run. These tests are behavior contracts, not
# snapshots: they assert that bare flags reach pytest and that value-taking
# flags (``-k expr``) keep their value instead of having it stolen by the
# positional-path discovery.
def _make_probe_dir(tmp_path: Path) -> Path:
"""Two trivial passing tests, one named test_alpha, one test_beta."""
probe_dir = tmp_path / "probe"
probe_dir.mkdir()
(probe_dir / "test_flagprobe.py").write_text(
"def test_alpha():\n assert True\n\n"
"def test_beta():\n assert True\n"
)
return probe_dir
def _run_runner(probe_dir: Path, *extra: str) -> subprocess.CompletedProcess:
repo_root = Path(__file__).resolve().parent.parent
runner = repo_root / "scripts" / "run_tests_parallel.py"
return subprocess.run(
[sys.executable, str(runner), "--paths", str(probe_dir),
"-j", "1", "--file-timeout", "30", *extra],
cwd=repo_root,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
timeout=60,
)
def test_bare_q_flag_passes_through(tmp_path: Path) -> None:
"""A bare ``-q`` (no ``--``) runs clean instead of erroring out."""
probe_dir = _make_probe_dir(tmp_path)
proc = _run_runner(probe_dir, "-q")
assert proc.returncode == 0, proc.stdout
assert "unrecognized arguments" not in proc.stdout
def test_bare_value_flag_keeps_its_value(tmp_path: Path) -> None:
"""``-k test_alpha`` reaches pytest as a selector, not as a path.
The value token (``test_alpha``) must NOT be swallowed by the runner's
positional-path discovery if it were, discovery would look for a path
named ``test_alpha``, find nothing, and the run would degrade. We assert
the run succeeds AND only one of the two tests was selected (proving the
``-k`` filter actually applied inside pytest).
"""
probe_dir = _make_probe_dir(tmp_path)
proc = _run_runner(probe_dir, "-k", "test_alpha")
assert proc.returncode == 0, proc.stdout
# Exactly one test selected: the per-file summary shows "1✓" (1 passed).
# test_beta is deselected by the -k filter.
assert "1✓" in proc.stdout or "1 passed" in proc.stdout, proc.stdout
assert "2✓" not in proc.stdout, (
f"both tests ran — -k filter did not apply:\n{proc.stdout}"
)
def test_explicit_double_dash_still_works(tmp_path: Path) -> None:
"""The legacy ``--`` separator keeps working alongside bare flags."""
probe_dir = _make_probe_dir(tmp_path)
proc = _run_runner(probe_dir, "-q", "--", "--tb=short")
assert proc.returncode == 0, proc.stdout
assert "unrecognized arguments" not in proc.stdout
def test_positional_path_not_treated_as_flag(tmp_path: Path) -> None:
"""A positional path arg still overrides discovery (not routed to pytest)."""
probe_dir = _make_probe_dir(tmp_path)
repo_root = Path(__file__).resolve().parent.parent
runner = repo_root / "scripts" / "run_tests_parallel.py"
# Pass the probe dir positionally (no --paths), plus a bare -q.
proc = subprocess.run(
[sys.executable, str(runner), str(probe_dir), "-j", "1",
"--file-timeout", "30", "-q"],
cwd=repo_root, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
text=True, timeout=60,
)
assert proc.returncode == 0, proc.stdout
# Discovery found the probe file (2 tests), proving the positional path
# was consumed as a root, not forwarded to pytest as a bad flag.
assert "test_flagprobe.py" in proc.stdout, proc.stdout