Source code for prompt_risk.uc.uc1.p1_extraction_runner

# -*- coding: utf-8 -*-

"""
UC1-P1 extraction runner — execute the FNOL extraction prompt and parse output.
"""

import typing as T
import json
import re
from pydantic import BaseModel, Field, ValidationError, field_validator

from ...constants import PromptIdEnum
from ...prompts import Prompt
from ...llm_output import extract_json
from ...bedrock_utils import converse

if T.TYPE_CHECKING:
    from mypy_boto3_bedrock_runtime import BedrockRuntimeClient

T_INJURY_INDICATOR = T.Literal["none", "minor", "moderate", "severe", "fatal"]
T_ESTIMATE_SEVERITY = T.Literal["low", "medium", "high"]



[docs]
class P1ExtractionUserPromptData(BaseModel):
    source: str
    narrative: str




[docs]
class P1ExtractionOutput(BaseModel):
    """Structured output for the P1 FNOL extraction prompt.

    Each field mirrors the JSON schema specified in the system prompt.
    Pydantic validators enforce that the model returns values within the
    expected formats and enumerations.  When validation fails, the retry
    loop in :func:`run` feeds the error back to the model so it can
    self-correct — see :func:`run` for details.
    """

    # fmt: off
    date_of_loss: str = Field(description="Date of the incident (YYYY-MM-DD or 'unknown')")
    time_of_loss: str = Field(description="Time of the incident (HH:MM 24-hour or 'unknown')")
    location: str = Field(description="Where the incident occurred")
    line_of_business_hint: str = Field(description="One of auto, property, workers_comp, general_liability, or ambiguous")
    parties_involved: list[str] = Field(description="List of party roles")
    damage_description: str = Field(description="Brief summary of damage")
    injury_indicator: T_INJURY_INDICATOR = Field(description="none, minor, moderate, severe, or fatal")
    police_report: str = Field(description="Report number if mentioned, otherwise 'none'")
    evidence_available: list[str] = Field(description="List of available evidence types")
    estimated_severity: T_ESTIMATE_SEVERITY = Field(description="low, medium, or high")
    # fmt: on

    @field_validator("date_of_loss")
    @classmethod
    def validate_date_of_loss(cls, v: str) -> str:
        if v == "unknown":
            return v
        from datetime import datetime

        try:
            datetime.strptime(v, "%Y-%m-%d")
        except ValueError:
            raise ValueError(
                f"date_of_loss must be 'YYYY-MM-DD' or 'unknown', got '{v}'"
            )
        return v



MAX_RETRIES = 3
"""Maximum number of converse API calls per :func:`run` invocation.

LLM output is non-deterministic — even with a well-crafted prompt, the model
may occasionally return values that violate the output schema (e.g. a date in
``MM/DD/YYYY`` instead of ``YYYY-MM-DD``, or a severity string outside the
allowed enum).  Rather than failing immediately, we feed the Pydantic
validation error back to the model as a follow-up user message so it can
self-correct.  Three attempts strikes a balance between resilience and cost:
most fixable errors resolve on the second try, and a third guards against
edge cases without runaway API spend.
"""



[docs]
def run_p1_extraction(
    client: "BedrockRuntimeClient",
    data: P1ExtractionUserPromptData,
    prompt_version: str = "01",
    model_id: str = "us.amazon.nova-2-lite-v1:0",
) -> P1ExtractionOutput:
    """Execute the P1 extraction prompt and return validated output.

    **System prompt caching** — The system prompt is static (no Jinja
    variables) by design.  This lets us place a ``cachePoint`` after it so
    that Bedrock caches the prefix across calls.  When the same system
    prompt is reused — whether across retries within a single ``run_p1_extraction()``
    call or across independent invocations — subsequent requests hit the
    cache and skip redundant input processing, reducing both latency and
    cost.

    **Why the user prompt is NOT cached** — The user prompt contains the
    per-request FNOL narrative and is different for every claim.  Caching
    it would incur a cache-write cost on every call with virtually zero
    chance of a cache hit, making it a net loss.  During retries the user
    prompt is already present in the ``messages`` history, so the model
    sees it without any extra caching mechanism.

    **Retry on validation failure** — LLM output is non-deterministic.
    When Pydantic validation fails (e.g. wrong date format, invalid enum
    value), we append the model's raw reply as an ``assistant`` message
    and the validation error as a ``user`` message, then call the API
    again.  This gives the model concrete feedback on what went wrong so
    it can self-correct.  We allow up to ``MAX_RETRIES`` attempts; if all
    fail, the last exception is re-raised.
    """
    prompt = Prompt(id=PromptIdEnum.UC1_P1_EXTRACTION.value, version=prompt_version)

    # System prompt is static — attach a cachePoint so Bedrock can reuse
    # the cached prefix across calls (retries and independent invocations).
    system = [
        {"text": prompt.system_prompt_template.render()},
        {"cachePoint": {"type": "default"}},
    ]
    user_prompt = prompt.user_prompt_template.render(data=data)
    messages: list[dict] = [
        {"role": "user", "content": [{"text": user_prompt}]},
    ]

    for attempt in range(MAX_RETRIES):
        text = converse(client, model_id, system, messages)
        json_obj = extract_json(text)

        try:
            return P1ExtractionOutput(**json_obj)
        except (json.JSONDecodeError, ValidationError) as exc:
            if attempt == MAX_RETRIES - 1:
                raise

            # Feed the validation error back so the model can self-correct.
            error_msg = (
                f"Your previous response failed validation:\n{exc}\n\n"
                "Please return a corrected JSON object."
            )
            messages.append({"role": "assistant", "content": [{"text": text}]})
            messages.append({"role": "user", "content": [{"text": error_msg}]})

    raise Exception("Should never reach this line of code")  # pragma: no cover