Source code for ralph.agents.parsers.claude_interactive_transcript_parser

"""Semantic parser for VT-normalized Claude interactive transcripts."""

from __future__ import annotations

import json
import re
from typing import cast

from ralph.display.vt_normalizer import normalize_vt_text

from .interactive_transcript_event import InteractiveTranscriptEvent

_SESSION_ID_PATTERNS = (
    re.compile(r"session\s+id\s*[:=]\s*([A-Za-z0-9._:-]+)", re.IGNORECASE),
    re.compile(r"--resume\s+([A-Za-z0-9._:-]+)"),
)
_TOOL_USE_PATTERN = re.compile(r"^claude tool:\s*\S", re.IGNORECASE)


def _extract_message_text(value: object) -> str:
    if isinstance(value, str):
        return value
    if isinstance(value, list):
        parts: list[str] = []
        for item in value:
            if isinstance(item, dict):
                text = item.get("text")
                if isinstance(text, str):
                    parts.append(text)
        return "".join(parts)
    return ""


[docs] class ClaudeInteractiveTranscriptParser: """Extract semantic events from a normalized Claude interactive transcript.""" def __init__(self) -> None: self.session_id: str | None = None self._last_emitted_signature: tuple[str, str] | None = None def feed(self, raw_text: str) -> list[InteractiveTranscriptEvent]: json_events = self._events_from_json(raw_text) if json_events is not None: return json_events normalized = normalize_vt_text(raw_text) events: list[InteractiveTranscriptEvent] = [] for line in normalized.splitlines(): text = line.strip() if not text: continue if not any(character.isalnum() for character in text): continue event = self._event_for_text(text) if event is not None: self._append_if_new(events, event) return events def _append_if_new( self, events: list[InteractiveTranscriptEvent], event: InteractiveTranscriptEvent ) -> None: signature = (event.kind, event.text) if signature == self._last_emitted_signature: return events.append(event) self._last_emitted_signature = signature def _events_from_assistant_content_item( self, item: dict[str, object] ) -> list[InteractiveTranscriptEvent]: item_type = str(item.get("type", "")) if item_type == "tool_use": tool_name = str(item.get("name", "tool")) return [InteractiveTranscriptEvent(kind="tool_use", text=f"claude tool: {tool_name}")] if item_type == "text": text = str(item.get("text", "")).strip() if text: return [InteractiveTranscriptEvent(kind="output", text=text)] if item_type == "thinking": text = str(item.get("thinking", "")).strip() if text: return [InteractiveTranscriptEvent(kind="thinking", text=text)] return [] def _events_from_assistant_message(self, message: object) -> list[InteractiveTranscriptEvent]: if not isinstance(message, dict): return [] content = message.get("content") if not isinstance(content, list): return [] events: list[InteractiveTranscriptEvent] = [] for item in content: if isinstance(item, dict): events.extend(self._events_from_assistant_content_item(item)) return events def _events_from_user_message(self, message: object) -> list[InteractiveTranscriptEvent]: if not isinstance(message, dict): return [] content = message.get("content") if not isinstance(content, list): return [] events: list[InteractiveTranscriptEvent] = [] for item in content: if not isinstance(item, dict): continue item_dict = cast("dict[str, object]", item) if item_dict.get("type") != "tool_result": continue text = _extract_message_text(item_dict.get("content")).strip() if text: events.append( InteractiveTranscriptEvent(kind="tool_result", text=f"claude result: {text}") ) return events def _events_from_json(self, raw_text: str) -> list[InteractiveTranscriptEvent] | None: try: parsed = cast("object", json.loads(raw_text)) except json.JSONDecodeError: return None if not isinstance(parsed, dict): return None obj = cast("dict[str, object]", parsed) event_type = str(obj.get("type", "")) events: list[InteractiveTranscriptEvent] = [] session_id = obj.get("sessionId") or obj.get("session_id") if isinstance(session_id, str) and session_id: self.session_id = session_id self._append_if_new(events, InteractiveTranscriptEvent(kind="session", text=session_id)) if event_type == "assistant": for event in self._events_from_assistant_message(obj.get("message")): self._append_if_new(events, event) elif event_type == "user": for event in self._events_from_user_message(obj.get("message")): self._append_if_new(events, event) return events def _event_for_text(self, text: str) -> InteractiveTranscriptEvent | None: for pattern in _SESSION_ID_PATTERNS: match = pattern.search(text) if match is not None: self.session_id = match.group(1) return InteractiveTranscriptEvent(kind="session", text=text) if _TOOL_USE_PATTERN.match(text): return InteractiveTranscriptEvent(kind="tool_use", text=text) if text.startswith("claude result:"): return InteractiveTranscriptEvent(kind="tool_result", text=text) if text.startswith("[claude]:") or text.startswith("claude ") or text.startswith("claude/"): return InteractiveTranscriptEvent(kind="lifecycle", text=text) return InteractiveTranscriptEvent(kind="output", text=text)