fix(gateway): release PID file + runtime lock in the force-exit backstop
Follow-up to #54111. That PR routed the early SystemExit exit paths (clean-fatal-config #51228, startup-aborted-before-running) through _exit_after_graceful_shutdown / os._exit. Those paths raise right after runner.start() without going through _stop_impl, so they relied on atexit to release the PID file + runtime lock — and os._exit bypasses atexit, leaking both. Release them explicitly in the backstop (the single guaranteed cleanup chokepoint). Both calls are idempotent: no-op on the normal _stop_impl path, actual cleanup on the early-exit paths. Corrects the now-inaccurate docstring claim that teardown always ran first. Adds a guard test plus the missing str-code->1 coverage. E2E: real PID file written + lock acquired, _exit_after_graceful_shutdown(78) exits code 78 AND removes the PID file (leak confirmed closed).
This commit is contained in:
parent
e23f723389
commit
df27267ed7
2 changed files with 64 additions and 7 deletions
|
|
@ -19326,7 +19326,7 @@ def main():
|
|||
|
||||
|
||||
def _exit_after_graceful_shutdown(exit_code: int) -> None:
|
||||
"""Flush stdio + logging, then hard-exit with ``exit_code``.
|
||||
"""Flush stdio, release the PID file + runtime lock, then hard-exit.
|
||||
|
||||
Graceful teardown is already complete by the time this runs, so there is
|
||||
nothing left that needs a clean interpreter shutdown. We deliberately use
|
||||
|
|
@ -19334,12 +19334,19 @@ def _exit_after_graceful_shutdown(exit_code: int) -> None:
|
|||
triggers ``Py_FinalizeEx`` → ``wait_for_thread_shutdown`` and joins every
|
||||
non-daemon thread — exactly the hang (#53107) a wedged tool-worker causes.
|
||||
|
||||
``os._exit`` also bypasses ``atexit`` handlers. That is safe here: the
|
||||
gateway's atexit-registered cleanup (``remove_pid_file``,
|
||||
``release_gateway_runtime_lock``) is also performed explicitly inside
|
||||
``start_gateway``'s teardown, so nothing is leaked. Any NEW atexit handler
|
||||
added to this process would be skipped — perform such cleanup in the
|
||||
teardown path, not via atexit.
|
||||
``os._exit`` bypasses ``atexit`` handlers, so we cannot rely on the
|
||||
``atexit``-registered ``remove_pid_file`` / ``release_gateway_runtime_lock``
|
||||
(registered in ``start_gateway``) to run. The full-shutdown path releases
|
||||
both explicitly in ``_stop_impl``, but the EARLY exit paths —
|
||||
clean-fatal-config (#51228) and startup-aborted-before-running — raise
|
||||
``SystemExit`` right after ``runner.start()`` without going through
|
||||
``_stop_impl``, so on those paths ``atexit`` was the only thing releasing
|
||||
them. Now that those paths are routed through this backstop (#53107),
|
||||
release both here explicitly. Both calls are idempotent —
|
||||
``remove_pid_file`` only unlinks a PID file that belongs to this process,
|
||||
and ``release_gateway_runtime_lock`` no-ops when the lock is already
|
||||
released — so this is a no-op on the normal shutdown path and the actual
|
||||
cleanup on the early-exit paths.
|
||||
|
||||
Logging is not flushed here: the gateway's handlers are synchronous
|
||||
``RotatingFileHandler``s that write each record immediately (no
|
||||
|
|
@ -19351,6 +19358,14 @@ def _exit_after_graceful_shutdown(exit_code: int) -> None:
|
|||
stream.flush()
|
||||
except Exception:
|
||||
pass
|
||||
# Guaranteed cleanup chokepoint: os._exit skips atexit, and the early
|
||||
# SystemExit exit paths never run _stop_impl, so release here (idempotent).
|
||||
try:
|
||||
from gateway.status import remove_pid_file, release_gateway_runtime_lock
|
||||
remove_pid_file()
|
||||
release_gateway_runtime_lock()
|
||||
except Exception:
|
||||
pass
|
||||
os._exit(exit_code)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -125,3 +125,45 @@ def test_main_systemexit_none_code_maps_to_zero(monkeypatch):
|
|||
gateway_run.main()
|
||||
|
||||
assert exc_info.value.code == 0
|
||||
|
||||
|
||||
def test_main_systemexit_str_code_maps_to_one(monkeypatch):
|
||||
"""SystemExit with a str code (CPython prints it to stderr then exits 1).
|
||||
We can't print during os._exit, but the code must still map to 1 — matching
|
||||
CPython's handle_system_exit semantics for a non-int, non-None code."""
|
||||
async def fake_start_gateway(config=None):
|
||||
raise SystemExit("fatal: something went wrong")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "start_gateway", fake_start_gateway)
|
||||
monkeypatch.setattr(gateway_run.os, "_exit", _raise_exit)
|
||||
monkeypatch.setattr(gateway_run.sys, "argv", ["gateway.run"])
|
||||
monkeypatch.setattr(gateway_run.sys, "stdout", SimpleNamespace(flush=Mock()))
|
||||
monkeypatch.setattr(gateway_run.sys, "stderr", SimpleNamespace(flush=Mock()))
|
||||
|
||||
with pytest.raises(_ExitCalled) as exc_info:
|
||||
gateway_run.main()
|
||||
|
||||
assert exc_info.value.code == 1
|
||||
|
||||
|
||||
def test_exit_backstop_releases_pid_file_and_runtime_lock(monkeypatch):
|
||||
"""os._exit bypasses atexit, and the early SystemExit exit paths never run
|
||||
_stop_impl — so the force-exit backstop itself must release the PID file and
|
||||
runtime lock, or those early paths (#51228 fatal-config) would leak them.
|
||||
Both releases are idempotent, so this is safe on every exit path."""
|
||||
from gateway import status as gateway_status
|
||||
|
||||
remove_pid = Mock()
|
||||
release_lock = Mock()
|
||||
monkeypatch.setattr(gateway_status, "remove_pid_file", remove_pid)
|
||||
monkeypatch.setattr(gateway_status, "release_gateway_runtime_lock", release_lock)
|
||||
monkeypatch.setattr(gateway_run.os, "_exit", _raise_exit)
|
||||
monkeypatch.setattr(gateway_run.sys, "stdout", SimpleNamespace(flush=Mock()))
|
||||
monkeypatch.setattr(gateway_run.sys, "stderr", SimpleNamespace(flush=Mock()))
|
||||
|
||||
with pytest.raises(_ExitCalled) as exc_info:
|
||||
gateway_run._exit_after_graceful_shutdown(78)
|
||||
|
||||
assert exc_info.value.code == 78
|
||||
remove_pid.assert_called_once_with()
|
||||
release_lock.assert_called_once_with()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue