fix: make Nous Portal access token resolution resilient
- Track auth store source path on Nous state reads and write rotated OAuth refresh tokens back to the same store, preventing stale-token replays when Hermes falls back to a global/root auth.json. - Skip Nous fallback entries locally when no access/refresh token is present, suppressing repeated failed resolution attempts within a session. - Sync session model metadata after fallback switches so the gateway DB reflects the backend that actually served the latest turn.
This commit is contained in:
parent
cfbc7ed1f9
commit
6ed2f5d76f
4 changed files with 276 additions and 32 deletions
|
|
@ -1124,6 +1124,35 @@ def rewrite_prompt_model_identity(agent, model: str, provider: str) -> None:
|
|||
agent._cached_system_prompt = sp
|
||||
|
||||
|
||||
def _fallback_entry_key(fb: dict) -> tuple[str, str, str]:
|
||||
return (
|
||||
str(fb.get("provider") or "").strip().lower(),
|
||||
str(fb.get("model") or "").strip(),
|
||||
str(fb.get("base_url") or "").strip().rstrip("/"),
|
||||
)
|
||||
|
||||
|
||||
def _fallback_entry_unavailable_without_network(agent, fb: dict) -> Optional[str]:
|
||||
"""Return a skip reason for fallback entries known to be unusable locally."""
|
||||
fb_provider = (fb.get("provider") or "").strip().lower()
|
||||
if fb_provider != "nous":
|
||||
return None
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
|
||||
state = get_provider_auth_state("nous") or {}
|
||||
except Exception as exc:
|
||||
return f"nous_auth_unreadable:{type(exc).__name__}"
|
||||
access_value = state.get("access_token")
|
||||
refresh_value = state.get("refresh_token")
|
||||
has_access = isinstance(access_value, str) and bool(access_value.strip())
|
||||
has_refresh = isinstance(refresh_value, str) and bool(refresh_value.strip())
|
||||
if not (has_access or has_refresh):
|
||||
return "nous_token_missing"
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool:
|
||||
"""Switch to the next fallback model/provider in the chain.
|
||||
|
||||
|
|
@ -1164,10 +1193,29 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
return False
|
||||
fb = agent._fallback_chain[agent._fallback_index]
|
||||
agent._fallback_index += 1
|
||||
fb_key = _fallback_entry_key(fb)
|
||||
unavailable = getattr(agent, "_unavailable_fallback_keys", None)
|
||||
if unavailable is None:
|
||||
unavailable = set()
|
||||
agent._unavailable_fallback_keys = unavailable
|
||||
if fb_key in unavailable:
|
||||
logger.debug("Fallback skip: %s previously marked unavailable", fb_key)
|
||||
return agent._try_activate_fallback(reason)
|
||||
fb_provider = (fb.get("provider") or "").strip().lower()
|
||||
fb_model = (fb.get("model") or "").strip()
|
||||
if not fb_provider or not fb_model:
|
||||
return agent._try_activate_fallback() # skip invalid, try next
|
||||
return agent._try_activate_fallback(reason) # skip invalid, try next
|
||||
|
||||
local_skip_reason = _fallback_entry_unavailable_without_network(agent, fb)
|
||||
if local_skip_reason:
|
||||
unavailable.add(fb_key)
|
||||
logger.warning(
|
||||
"Fallback skip: %s/%s is not locally usable (%s); suppressing for this session",
|
||||
fb_provider,
|
||||
fb_model,
|
||||
local_skip_reason,
|
||||
)
|
||||
return agent._try_activate_fallback(reason)
|
||||
|
||||
# Skip entries that resolve to the current (provider, model) — falling
|
||||
# back to the same backend that just failed loops the failure. Compare
|
||||
|
|
@ -1182,7 +1230,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
"Fallback skip: chain entry %s/%s matches current provider/model",
|
||||
fb_provider, fb_model,
|
||||
)
|
||||
return agent._try_activate_fallback()
|
||||
return agent._try_activate_fallback(reason)
|
||||
if (
|
||||
fb_base_url_for_dedup
|
||||
and current_base_url
|
||||
|
|
@ -1193,7 +1241,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
"Fallback skip: chain entry base_url %s matches current backend",
|
||||
fb_base_url_for_dedup,
|
||||
)
|
||||
return agent._try_activate_fallback()
|
||||
return agent._try_activate_fallback(reason)
|
||||
|
||||
# Use centralized router for client construction.
|
||||
# raw_codex=True because the main agent needs direct responses.stream()
|
||||
|
|
@ -1224,7 +1272,8 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
logger.warning(
|
||||
"Fallback to %s failed: provider not configured",
|
||||
fb_provider)
|
||||
return agent._try_activate_fallback() # try next in chain
|
||||
unavailable.add(fb_key)
|
||||
return agent._try_activate_fallback(reason) # try next in chain
|
||||
try:
|
||||
from hermes_cli.model_normalize import normalize_model_for_provider
|
||||
|
||||
|
|
@ -1425,8 +1474,10 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
if fb_provider == "nous":
|
||||
unavailable.add(fb_key)
|
||||
logger.error("Failed to activate fallback %s: %s", fb_model, e)
|
||||
return agent._try_activate_fallback() # try next in chain
|
||||
return agent._try_activate_fallback(reason) # try next in chain
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue