Completes the #30719 restart-loop defenses. Defenses 1-2 (the _HERMES_GATEWAY guard on `hermes gateway stop|restart` + terminal_tool, and the cron-creation lifecycle filter) already landed on main, but two gaps remained: - The agent's `cronjob` model tool calls cron.jobs.create_job directly, bypassing the hermes_cli.cron.cron_create CLI filter, so lifecycle commands scheduled via the model tool were only blocked at execution time (terminal_tool), not at creation. Moved the filter to a shared cron/lifecycle_guard.py enforced at create_job — the single chokepoint every job-creation path hits (CLI + model tool). Re-exported _contains_gateway_lifecycle_command from hermes_cli.cron so terminal_tool's import keeps working. - No breaker for the auto-resume loop itself. Defenses 1-2 cover the cron/CLI/terminal paths, but any other SIGTERM source (e.g. a raw terminal("launchctl kickstart ai.hermes.gateway")) still triggers the boot->auto-resume->re-run cycle. Added gateway/restart_loop_guard.py: counts restart-interrupted boots in a rolling window (config gateway.restart_loop_guard, default 3 boots / 60s) and skips auto-resume for that boot once tripped. The gateway still comes up and serves real inbound messages; it just stops replaying the session that keeps killing it, putting a human back in the loop. Also tightened the lifecycle regex over main's version: dropped `hermes gateway start` (benign), required the gateway identifier on the launchctl/systemctl branches (so `launchctl unload ai.hermes.update-checker.plist` and `systemctl restart hermes-meta.service` no longer false-positive), added the inverse pkill token order, and fixed the binary-script bypass (decode with errors='replace' instead of swallowing UnicodeDecodeError). The create_job guard resolves relative script paths under HERMES_HOME/scripts the same way the scheduler does, so a bare script name is scanned as the file that actually runs. Design and much of defense-2 originate from PR #33395 (@kshitijk4poor), which itself salvaged #30728 (@SimoKiihamaki). Rebuilt against current main since defenses 1-2 had already landed under different names. Closes #30719. Co-authored-by: SimoKiihamaki <simo.kiihamaki@gmail.com> Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
533 lines
22 KiB
Python
533 lines
22 KiB
Python
"""Tests for gateway restart-loop defenses (#30719).
|
|
|
|
Covers:
|
|
- Defense 1: gateway stop/restart refuse when _HERMES_GATEWAY=1
|
|
- Defense 2: cron create rejects prompts containing gateway lifecycle commands
|
|
- _contains_gateway_lifecycle_command pattern matching
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from argparse import Namespace
|
|
|
|
import pytest
|
|
|
|
from hermes_cli.cron import (
|
|
_contains_gateway_lifecycle_command,
|
|
cron_command,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defense 2: _contains_gateway_lifecycle_command pattern tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGatewayLifecyclePattern:
|
|
"""Verify the regex catches gateway lifecycle commands."""
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"hermes gateway restart",
|
|
"hermes gateway stop",
|
|
"hermes gateway restart", # double spaces
|
|
"Hermez Gateway Restart".lower().replace("z", "s"), # case handled
|
|
"HERMES GATEWAY RESTART", # uppercase
|
|
])
|
|
def test_hermes_gateway_commands(self, text):
|
|
assert _contains_gateway_lifecycle_command(text), f"Should match: {text!r}"
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"launchctl kickstart gui/501/ai.hermes.gateway",
|
|
"launchctl unload ~/Library/LaunchAgents/ai.hermes.gateway.plist",
|
|
"launchctl stop ai.hermes.gateway",
|
|
"systemctl restart hermes-gateway",
|
|
"systemctl stop hermes-gateway.service",
|
|
"systemctl start hermes-gateway",
|
|
])
|
|
def test_service_manager_commands(self, text):
|
|
assert _contains_gateway_lifecycle_command(text), f"Should match: {text!r}"
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"kill hermes gateway process",
|
|
"pkill -f hermes.*gateway",
|
|
"pkill -f gateway.*hermes", # inverse token order
|
|
])
|
|
def test_kill_commands(self, text):
|
|
assert _contains_gateway_lifecycle_command(text), f"Should match: {text!r}"
|
|
|
|
@pytest.mark.parametrize("text", [
|
|
"restart the server application",
|
|
"hermes cron list",
|
|
"hermes update",
|
|
"hermes config set model claude",
|
|
"echo 'just a normal cron job'",
|
|
"run the backup script",
|
|
"gateway is running fine",
|
|
# `hermes gateway start` is benign — starting a gateway from inside a
|
|
# gateway is a no-op / "already running", and a legit cron job may
|
|
# start a sibling profile's gateway. Only restart/stop/kill are the
|
|
# foot-gun (#30719 lists only those).
|
|
"hermes gateway start",
|
|
"hermes gateway start --all",
|
|
# Tightened launchctl/systemctl branches: ops on NON-gateway hermes
|
|
# services must not be falsely blocked (the old `.*hermes` matched any
|
|
# hermes token).
|
|
"launchctl unload ai.hermes.update-checker.plist",
|
|
"launchctl restart ai.hermes.daemon",
|
|
"systemctl restart hermes-meta.service",
|
|
"systemctl restart hermes-cron-helper",
|
|
# Regression (#30728 follow-up): legit prompts that merely mention an
|
|
# unrelated gateway + a restart must NOT be blocked. The cron prompt is
|
|
# fed to an LLM, not a shell, so substring detection on English text is
|
|
# a high-FP no-op — only concrete command shapes trigger the block.
|
|
"Summarize the API gateway logs and report any restart events from last night",
|
|
"Check if the payment gateway needs a restart after the deploy",
|
|
"Monitor the gateway and tell me if a restart is recommended",
|
|
"research how the OpenAI API gateway handles restart after rate limiting",
|
|
"compare AWS API Gateway vs Cloudflare on restart latency",
|
|
])
|
|
def test_safe_commands(self, text):
|
|
assert not _contains_gateway_lifecycle_command(text), f"Should NOT match: {text!r}"
|
|
|
|
|
|
class TestCronCreateLifecycleBlock:
|
|
"""Verify cron create rejects gateway lifecycle prompts."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _setup_cron_dir(self, tmp_path, monkeypatch):
|
|
monkeypatch.setattr("cron.jobs.CRON_DIR", tmp_path / "cron")
|
|
monkeypatch.setattr("cron.jobs.JOBS_FILE", tmp_path / "cron" / "jobs.json")
|
|
monkeypatch.setattr("cron.jobs.OUTPUT_DIR", tmp_path / "cron" / "output")
|
|
|
|
def test_block_hermes_gateway_restart(self, capsys):
|
|
args = Namespace(
|
|
cron_command="create",
|
|
schedule="30m",
|
|
prompt="Upgrade hermes then run hermes gateway restart",
|
|
name=None,
|
|
deliver=None,
|
|
repeat=None,
|
|
skill=None,
|
|
skills=None,
|
|
script=None,
|
|
workdir=None,
|
|
profile=None,
|
|
no_agent=False,
|
|
)
|
|
rc = cron_command(args)
|
|
assert rc == 1
|
|
out = capsys.readouterr().out
|
|
assert "Blocked" in out
|
|
assert "#30719" in out
|
|
|
|
def test_block_launchctl_kickstart(self, capsys):
|
|
args = Namespace(
|
|
cron_command="create",
|
|
schedule="0 9 * * *",
|
|
prompt="Run launchctl kickstart -k gui/501/ai.hermes.gateway",
|
|
name=None,
|
|
deliver=None,
|
|
repeat=None,
|
|
skill=None,
|
|
skills=None,
|
|
script=None,
|
|
workdir=None,
|
|
profile=None,
|
|
no_agent=False,
|
|
)
|
|
rc = cron_command(args)
|
|
assert rc == 1
|
|
out = capsys.readouterr().out
|
|
assert "Blocked" in out
|
|
|
|
def test_block_script_with_lifecycle_command(self, tmp_path, capsys, monkeypatch):
|
|
# A no_agent job whose script IS the job (the issue's real abuse path:
|
|
# restart_hermes_gateway_once.sh). The script must live under
|
|
# HERMES_HOME/scripts so the scheduler — and the guard — resolve it.
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
|
|
scripts_dir = tmp_path / ".hermes" / "scripts"
|
|
scripts_dir.mkdir(parents=True)
|
|
(scripts_dir / "restart.sh").write_text("#!/bin/bash\nhermes gateway restart\n")
|
|
args = Namespace(
|
|
cron_command="create",
|
|
schedule="1h",
|
|
prompt=None,
|
|
name=None,
|
|
deliver=None,
|
|
repeat=None,
|
|
skill=None,
|
|
skills=None,
|
|
script="restart.sh",
|
|
workdir=None,
|
|
profile=None,
|
|
no_agent=True,
|
|
)
|
|
rc = cron_command(args)
|
|
assert rc == 1
|
|
out = capsys.readouterr().out
|
|
assert "Blocked" in out
|
|
|
|
def test_allow_safe_prompt(self, capsys):
|
|
args = Namespace(
|
|
cron_command="create",
|
|
schedule="30m",
|
|
prompt="Check server health and report status",
|
|
name=None,
|
|
deliver=None,
|
|
repeat=None,
|
|
skill=None,
|
|
skills=None,
|
|
script=None,
|
|
workdir=None,
|
|
profile=None,
|
|
no_agent=False,
|
|
)
|
|
rc = cron_command(args)
|
|
assert rc == 0
|
|
out = capsys.readouterr().out
|
|
assert "Created job" in out
|
|
|
|
def test_allow_empty_prompt(self, capsys):
|
|
"""Empty prompt (no lifecycle content) should pass the filter — the
|
|
API will still reject it for lacking prompt+skill, but that's a
|
|
separate validation, not the lifecycle guard."""
|
|
args = Namespace(
|
|
cron_command="create",
|
|
schedule="30m",
|
|
prompt=None,
|
|
name=None,
|
|
deliver=None,
|
|
repeat=None,
|
|
skill=None,
|
|
skills=None,
|
|
script=None,
|
|
workdir=None,
|
|
profile=None,
|
|
no_agent=False,
|
|
)
|
|
rc = cron_command(args)
|
|
# The lifecycle guard passes (no gateway command in prompt).
|
|
# The API rejects it for "requires prompt or skill" → rc 1, but
|
|
# the error message is about prompt/skill, NOT about "Blocked".
|
|
out = capsys.readouterr().out
|
|
assert "Blocked" not in out
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defense 1: gateway stop/restart refuse inside gateway
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGatewaySelfTargetingGuard:
|
|
"""Verify hermes gateway stop/restart refuse when _HERMES_GATEWAY=1."""
|
|
|
|
def test_stop_refuses_inside_gateway(self, monkeypatch):
|
|
monkeypatch.setenv("_HERMES_GATEWAY", "1")
|
|
from hermes_cli.gateway import gateway_command
|
|
args = Namespace(gateway_command="stop", all=False, system=False)
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
gateway_command(args)
|
|
assert exc_info.value.code == 1
|
|
|
|
def test_restart_refuses_inside_gateway(self, monkeypatch):
|
|
monkeypatch.setenv("_HERMES_GATEWAY", "1")
|
|
from hermes_cli.gateway import gateway_command
|
|
args = Namespace(gateway_command="restart", all=False, system=False)
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
gateway_command(args)
|
|
assert exc_info.value.code == 1
|
|
|
|
def test_stop_allows_outside_gateway(self, monkeypatch):
|
|
# With the gateway marker unset, the self-targeting guard must NOT
|
|
# fire. Prove control reaches the real stop path (rather than driving
|
|
# real signal delivery, which would trip the live-system guard) by
|
|
# short-circuiting the first downstream call with a sentinel.
|
|
monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
|
|
import hermes_cli.gateway as gw
|
|
|
|
class _Reached(Exception):
|
|
pass
|
|
|
|
def _sentinel(*a, **k):
|
|
raise _Reached()
|
|
|
|
monkeypatch.setattr(gw, "_dispatch_via_service_manager_if_s6", _sentinel)
|
|
monkeypatch.setattr(gw, "_dispatch_all_via_service_manager_if_s6", _sentinel)
|
|
args = Namespace(gateway_command="stop", all=False, system=False)
|
|
with pytest.raises(_Reached):
|
|
gw.gateway_command(args)
|
|
|
|
def test_restart_allows_outside_gateway(self, monkeypatch):
|
|
# Same as above for restart: guard must not fire when the marker is
|
|
# unset. The first thing restart does after the guard is the s6
|
|
# dispatch check — sentinel it so we never reach real signal delivery.
|
|
monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
|
|
import hermes_cli.gateway as gw
|
|
|
|
class _Reached(Exception):
|
|
pass
|
|
|
|
def _sentinel(*a, **k):
|
|
raise _Reached()
|
|
|
|
monkeypatch.setattr(gw, "_dispatch_via_service_manager_if_s6", _sentinel)
|
|
monkeypatch.setattr(gw, "_dispatch_all_via_service_manager_if_s6", _sentinel)
|
|
args = Namespace(gateway_command="restart", all=False, system=False)
|
|
with pytest.raises(_Reached):
|
|
gw.gateway_command(args)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defense 3: terminal_tool hard-blocks gateway lifecycle commands inside gateway
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestTerminalToolGatewayLifecycleGuard:
|
|
"""terminal_tool must refuse gateway lifecycle commands when _HERMES_GATEWAY=1.
|
|
|
|
Issue #37453: systemctl --user restart hermes-gateway runs as a child of the
|
|
gateway process. When systemd delivers SIGTERM the gateway kills its own
|
|
restart command mid-execution — the service may never restart. The guard
|
|
must fire before execution, unconditionally (force=True cannot bypass it).
|
|
"""
|
|
|
|
def _make_fake_env(self):
|
|
class _FakeEnv:
|
|
env = {}
|
|
def execute(self, command, **kwargs): # pragma: no cover
|
|
raise AssertionError("execute must not be reached")
|
|
return _FakeEnv()
|
|
|
|
def _minimal_config(self):
|
|
return {"env_type": "local", "cwd": "/tmp", "timeout": 60, "lifetime_seconds": 3600}
|
|
|
|
def _patch_env(self, monkeypatch, fake_env, *, inside_gateway: bool):
|
|
import tools.terminal_tool as tt
|
|
eid = "default"
|
|
monkeypatch.setattr(tt, "_active_environments", {eid: fake_env})
|
|
monkeypatch.setattr(tt, "_last_activity", {eid: 0.0})
|
|
monkeypatch.setattr(tt, "_task_env_overrides", {})
|
|
monkeypatch.setattr(tt, "_get_env_config", self._minimal_config)
|
|
if inside_gateway:
|
|
monkeypatch.setenv("_HERMES_GATEWAY", "1")
|
|
else:
|
|
monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
|
|
|
|
@pytest.mark.parametrize("cmd", [
|
|
"systemctl restart hermes-gateway",
|
|
"systemctl --user restart hermes-gateway",
|
|
"systemctl stop hermes-gateway.service",
|
|
"hermes gateway restart",
|
|
"launchctl kickstart gui/501/ai.hermes.gateway",
|
|
"pkill -f hermes.*gateway",
|
|
])
|
|
def test_blocks_lifecycle_commands_inside_gateway(self, monkeypatch, cmd):
|
|
import tools.terminal_tool as tt
|
|
self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
|
|
|
|
result = json.loads(tt.terminal_tool(command=cmd))
|
|
|
|
assert result["exit_code"] == 1
|
|
assert "Blocked" in result["error"]
|
|
|
|
def test_force_true_cannot_bypass_block(self, monkeypatch):
|
|
import tools.terminal_tool as tt
|
|
self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
|
|
|
|
result = json.loads(tt.terminal_tool(
|
|
command="systemctl restart hermes-gateway", force=True
|
|
))
|
|
|
|
assert result["exit_code"] == 1
|
|
assert "Blocked" in result["error"]
|
|
|
|
def test_safe_systemctl_commands_pass_through(self, monkeypatch):
|
|
"""Non-hermes systemctl commands must not be blocked by this guard."""
|
|
import tools.terminal_tool as tt
|
|
|
|
calls = []
|
|
|
|
class _FakeEnv:
|
|
env = {}
|
|
def execute(self, command, **kwargs):
|
|
calls.append(command)
|
|
return {"output": "Active: running", "returncode": 0}
|
|
|
|
self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=True)
|
|
monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env, **kwargs: {"approved": True})
|
|
|
|
result = json.loads(tt.terminal_tool(command="systemctl status nginx"))
|
|
|
|
assert result["exit_code"] == 0
|
|
assert calls == ["systemctl status nginx"]
|
|
|
|
def test_guard_inactive_outside_gateway(self, monkeypatch):
|
|
"""Without _HERMES_GATEWAY=1 the lifecycle guard must not fire."""
|
|
import tools.terminal_tool as tt
|
|
|
|
calls = []
|
|
|
|
class _FakeEnv:
|
|
env = {}
|
|
def execute(self, command, **kwargs):
|
|
calls.append(command)
|
|
return {"output": "restarting...", "returncode": 0}
|
|
|
|
self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=False)
|
|
monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env, **kwargs: {"approved": True})
|
|
|
|
result = json.loads(tt.terminal_tool(command="systemctl restart hermes-gateway"))
|
|
|
|
# Outside the gateway the lifecycle guard doesn't block — the normal
|
|
# approval flow handles it (here mocked as approved).
|
|
assert result["exit_code"] == 0
|
|
assert calls == ["systemctl restart hermes-gateway"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# cron.lifecycle_guard module — the shared checker create_job/CLI/terminal use
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestLifecycleGuardModule:
|
|
"""Direct tests for cron.lifecycle_guard.check_gateway_lifecycle."""
|
|
|
|
def test_prompt_with_command_raises(self):
|
|
from cron.lifecycle_guard import GatewayLifecycleBlocked, check_gateway_lifecycle
|
|
with pytest.raises(GatewayLifecycleBlocked) as exc:
|
|
check_gateway_lifecycle("please run hermes gateway restart", None)
|
|
assert "#30719" in str(exc.value)
|
|
|
|
def test_clean_prompt_does_not_raise(self):
|
|
from cron.lifecycle_guard import check_gateway_lifecycle
|
|
check_gateway_lifecycle("research the gateway architecture", None)
|
|
check_gateway_lifecycle("check server health and restart watchers", None)
|
|
|
|
def test_script_with_command_raises(self, tmp_path, monkeypatch):
|
|
from cron.lifecycle_guard import GatewayLifecycleBlocked, check_gateway_lifecycle
|
|
script = tmp_path / "restart.sh"
|
|
script.write_text("#!/bin/bash\nhermes gateway restart\n")
|
|
with pytest.raises(GatewayLifecycleBlocked):
|
|
check_gateway_lifecycle("clean prompt", str(script))
|
|
|
|
def test_split_across_prompt_and_script_still_blocks(self, tmp_path):
|
|
"""Concatenated scan prevents splitting the command between prompt and
|
|
script to slip through."""
|
|
from cron.lifecycle_guard import GatewayLifecycleBlocked, check_gateway_lifecycle
|
|
script = tmp_path / "ops.sh"
|
|
script.write_text("hermes gateway stop\n")
|
|
with pytest.raises(GatewayLifecycleBlocked):
|
|
check_gateway_lifecycle("daily ops job", str(script))
|
|
|
|
def test_binary_script_does_not_silently_bypass(self, tmp_path):
|
|
"""Non-UTF-8 bytes used to be swallowed by UnicodeDecodeError; now we
|
|
decode with errors='replace' so the scan always sees the command."""
|
|
from cron.lifecycle_guard import GatewayLifecycleBlocked, check_gateway_lifecycle
|
|
script = tmp_path / "weird.bin"
|
|
script.write_bytes(b"\xfehermes gateway restart\xff")
|
|
with pytest.raises(GatewayLifecycleBlocked):
|
|
check_gateway_lifecycle("", str(script))
|
|
|
|
def test_missing_script_does_not_raise(self, tmp_path):
|
|
from cron.lifecycle_guard import check_gateway_lifecycle
|
|
check_gateway_lifecycle("clean prompt", str(tmp_path / "nonexistent.sh"))
|
|
|
|
def test_relative_script_resolved_under_scripts_dir(self, tmp_path, monkeypatch):
|
|
"""A bare/relative script name resolves under HERMES_HOME/scripts (the
|
|
same place the scheduler runs it from) — otherwise the guard would read
|
|
a nonexistent relative path and scan prompt-only content."""
|
|
from cron.lifecycle_guard import GatewayLifecycleBlocked, check_gateway_lifecycle
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
|
|
scripts_dir = tmp_path / ".hermes" / "scripts"
|
|
scripts_dir.mkdir(parents=True)
|
|
(scripts_dir / "restart.sh").write_text(
|
|
"launchctl kickstart -k gui/501/ai.hermes.gateway\n"
|
|
)
|
|
with pytest.raises(GatewayLifecycleBlocked):
|
|
check_gateway_lifecycle("daily", "restart.sh")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defense 2 (chokepoint): cron.jobs.create_job blocks the AGENT model-tool path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCreateJobBlocksLifecycleCommands:
|
|
"""The regression the CLI-layer-only guard could not catch: the agent's
|
|
`cronjob` model tool calls cron.jobs.create_job directly, bypassing
|
|
hermes_cli.cron.cron_create. Enforcing at create_job covers both."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _setup_cron_dir(self, tmp_path, monkeypatch):
|
|
monkeypatch.setattr("cron.jobs.CRON_DIR", tmp_path / "cron")
|
|
monkeypatch.setattr("cron.jobs.JOBS_FILE", tmp_path / "cron" / "jobs.json")
|
|
monkeypatch.setattr("cron.jobs.OUTPUT_DIR", tmp_path / "cron" / "output")
|
|
|
|
def test_create_job_blocks_prompt_command(self):
|
|
from cron.jobs import create_job
|
|
from cron.lifecycle_guard import GatewayLifecycleBlocked
|
|
with pytest.raises(GatewayLifecycleBlocked):
|
|
create_job(prompt="then run hermes gateway restart", schedule="30m")
|
|
|
|
def test_create_job_allows_benign_prompt(self):
|
|
from cron.jobs import create_job
|
|
job = create_job(prompt="summarize the API gateway logs and note restart events",
|
|
schedule="30m")
|
|
assert job["id"]
|
|
|
|
def test_cronjob_tool_surfaces_block_as_error(self, tmp_path, monkeypatch):
|
|
"""End-to-end through the model tool: the block comes back as
|
|
result['error'] with the #30719 hint, not an unhandled exception."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
|
|
(tmp_path / ".hermes").mkdir(parents=True)
|
|
from tools.cronjob_tools import cronjob
|
|
result = json.loads(cronjob(
|
|
action="create", schedule="0 9 * * *",
|
|
prompt="please run hermes gateway restart nightly",
|
|
))
|
|
assert result.get("success") is False
|
|
assert "#30719" in result.get("error", "")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defense 3: auto-resume restart-loop breaker
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestRestartLoopGuard:
|
|
"""gateway.restart_loop_guard trips after >= max_restarts
|
|
restart-interrupted boots inside window_seconds, breaking a
|
|
SIGTERM-respawn loop that defenses 1-2 don't cover."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _isolate_state(self, tmp_path, monkeypatch):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
|
|
(tmp_path / ".hermes").mkdir(parents=True)
|
|
import gateway.restart_loop_guard as rlg
|
|
rlg.clear()
|
|
|
|
def test_burst_trips_on_threshold(self):
|
|
import gateway.restart_loop_guard as rlg
|
|
assert rlg.check_and_record(3, 60, now=1000.0) is False
|
|
assert rlg.check_and_record(3, 60, now=1005.0) is False
|
|
assert rlg.check_and_record(3, 60, now=1010.0) is True
|
|
|
|
def test_spread_boots_never_trip(self):
|
|
import gateway.restart_loop_guard as rlg
|
|
assert rlg.check_and_record(3, 60, now=1000.0) is False
|
|
assert rlg.check_and_record(3, 60, now=1070.0) is False
|
|
assert rlg.check_and_record(3, 60, now=1140.0) is False
|
|
|
|
def test_disabled_when_max_restarts_zero(self):
|
|
import gateway.restart_loop_guard as rlg
|
|
for i in range(5):
|
|
assert rlg.check_and_record(0, 60, now=1000.0 + i) is False
|
|
|
|
def test_is_tripped_reads_without_recording(self):
|
|
import gateway.restart_loop_guard as rlg
|
|
rlg.record_restart_interrupted_boot(60, now=1000.0)
|
|
rlg.record_restart_interrupted_boot(60, now=1001.0)
|
|
assert rlg.is_restart_loop_tripped(3, 60, now=1002.0) is False
|
|
rlg.record_restart_interrupted_boot(60, now=1002.0)
|
|
assert rlg.is_restart_loop_tripped(3, 60, now=1003.0) is True
|
|
|
|
def test_clear_resets(self):
|
|
import gateway.restart_loop_guard as rlg
|
|
rlg.check_and_record(3, 60, now=1000.0)
|
|
rlg.check_and_record(3, 60, now=1001.0)
|
|
rlg.clear()
|
|
assert rlg.check_and_record(3, 60, now=1002.0) is False
|