spongebob-vocab/__init__.py

"""
SpongeBob vocabulary transformer for BarnacleBoy.

Two modes:
1. Pattern-based vocabulary replacement (Wow! → Tartar sauce!)
2. Random opening interjections (Jumping jellyfish! Let's...)

Hook: transform_llm_output
"""

import os
import re
import logging
import random

logger = logging.getLogger(__name__)

# Opening interjections - randomly prepended to responses
# These are BarnacleBoy's signature exclamations
OPENING_INTERJECTIONS = [
    "Jumping jellyfish!",
    "Tartar sauce!",
    "Barnacles!",
    "Holy Krabby Patties!",
    "Great Neptune's nostrils!",
    "Mother of Pearl!",
    "Flappin' flounders!",
    "Holy fish paste!",
    "Holy shrimp!",
    "Great Barrier Reef!",
    "Aw, fishpaste!",
    "Holy cephalopod!",
    "What the barnacle!",
]

# Vocabulary replacements - case-insensitive regex patterns
# Format: (pattern, replacement)
# Uses [!,] to match both exclamation and comma contexts
# SpongeBob phrases always end with ! - the emphasis carries the emotion
VOCABULARY_PATTERNS = [
    # Wow - surprise/excitement
    (r'\bWow[!,](\s)', r'Tartar sauce!\1'),      # Wow! or Wow, → Tartar sauce!
    (r'\bWow!\Z', 'Tartar sauce!'),            # Wow! at end of string
    (r'\bWow,', 'Tartar sauce,'),              # Keep existing comma pattern for variety

    # Amazing - praise/excitement
    (r'\b[Aa]mazing[!,](\s)', r'Holy Krabby Patties!\1'),
    (r'\b[Aa]mazing!\Z', 'Holy Krabby Patties!'),

    # Excellent -satisfaction
    (r'\b[Ee]xcellent[!,](\s)', r'Holy fish paste!\1'),
    (r'\b[Ee]xcellent!\Z', 'Holy fish paste!'),

    # Great -emphasis (case-sensitive, only "Great" not "great")
    (r'\bGreat[!,](\s)', r'Barnacles!\1'),
    (r'\bGreat!\Z', 'Barnacles!'),

    # Perfect - satisfaction
    (r'\b[Pp]erfect[!,](\s)', r'Holy cephalopod!\1'),
    (r'\b[Pp]erfect!\Z', 'Holy cephalopod!'),

    # Fantastic - excitement/praise
    (r'\b[Ff]antastic[!,](\s)', r'Holy shrimp!\1'),
    (r'\b[Ff]antastic!\Z', 'Holy shrimp!'),

    # Brilliant - praise (British flavor, matches both cases)
    (r'\b[Bb]rilliant[!,](\s)', r'Great Barrier Reef!\1'),
    (r'\b[Bb]rilliant!\Z', 'Great Barrier Reef!'),

    # Damn - frustration/mild swearing
    (r'\b[Dd]amn!', 'Barnacles!'),              # Damn! → Barnacles!
    (r'\b[Dd]amn,', 'Aw, barnacles,'),          # Damn, → Aw, barnacles,

    # Ugh - frustration
    (r'\bUgh!', 'Fish paste!'),                 # Ugh! → Fish paste!
    (r'\bUgh,', 'Aw, fish paste!'),             # Ugh, → Aw, fish paste!

    # Common openers - conversational fillers
    (r'\bWell,', "Flappin' flounders,"),         # Well, → Flappin' flounders,
    (r'\bOh,', 'Oh, barnacles,'),               # Oh, → Oh, barnacles,
]

# Patterns to preserve (never transform)
PRESERVE_PATTERNS = [
    r'```[\s\S]*?```',  # Code blocks
    r'`[^`]+`',          # Inline code
    r'https?://[^\s]+',  # URLs
    r'~/[^\s]+',        # File paths starting with ~
    r'/[a-zA-Z][^\s]*', # Absolute paths (start with /)
    r'\$[^\n]+',        # Shell commands
]


def transform_vocabulary(response_text: str, session_id: str = "", model: str = "", platform: str = "") -> str:
    """
    Transform response text with SpongeBob vocabulary.

    Args:
        response_text: The LLM's output text
        session_id: Session identifier (for context-aware decisions)
        model: Model name (unused but required by hook signature)
        platform: Platform name (e.g., "discord", "telegram")

    Returns:
        Transformed text with SpongeBob vocabulary, or None if disabled.
        Non-None non-empty string takes precedence over other hooks.
    """
    # Check if SpongeBob mode is enabled
    mode = os.environ.get("SPONGEBOB_MODE", "").lower()
    if mode not in ("on", "1", "true", "yes"):
        return None  # Not enabled, let other hooks or default pass through

    if not response_text or not response_text.strip():
        return None  # Empty response, nothing to transform

    logger.info("[spongebob-vocab] Hook fired, transforming output")

    # Find all regions to preserve
    preserved = []
    preserved_text = response_text

    for pattern in PRESERVE_PATTERNS:
        for match in re.finditer(pattern, response_text, re.MULTILINE):
            placeholder = f"\x00PRESERVE{len(preserved)}\x00"
            preserved_text = preserved_text.replace(match.group(0), placeholder)
            preserved.append(match.group(0))

    # Apply vocabulary transformations
    transformed = preserved_text
    for pattern, replacement in VOCABULARY_PATTERNS:
        transformed = re.sub(pattern, replacement, transformed)

    # Restore preserved regions
    for i, original in enumerate(preserved):
        transformed = transformed.replace(f"\x00PRESERVE{i}\x00", original)

    # Add random opening interjection (25% chance)
    # Configurable via SPONGEBOB_INTERJECTION_CHANCE env var (0.0-1.0)
    interjection_chance = float(os.environ.get("SPONGEBOB_INTERJECTION_CHANCE", "0.25"))

    if random.random() < interjection_chance:
        # Pick a random interjection
        interjection = random.choice(OPENING_INTERJECTIONS)

        # Check if response already starts with an interjection-like pattern
        # (avoid double-interjecting if the LLM already started with one)
        first_line = transformed.split('\n')[0].strip()
        already_interjected = any(
            first_line.startswith(exc.rstrip('!')) or first_line.startswith(exc)
            for exc in OPENING_INTERJECTIONS[:6]  # Check common ones
        )

        if not already_interjected:
            # Prepend interjection with space separator
            transformed = f"{interjection} {transformed}"
            logger.info(f"[spongebob-vocab] Prepended interjection: {interjection}")

    return transformed


def register(ctx):
    """Register the transform hook with Hermes."""
    ctx.register_hook("transform_llm_output", transform_vocabulary)
    logger.info("[spongebob-vocab] Registered transform_llm_output hook")


__all__ = ["transform_vocabulary", "register"]