Source code for prompt_risk.uc.uc1.p1_extraction_runner
# -*- coding: utf-8 -*-
"""
UC1-P1 extraction runner — execute the FNOL extraction prompt and parse output.
"""
import typing as T
import json
import re
from pydantic import BaseModel, Field, ValidationError, field_validator
from ...constants import PromptIdEnum
from ...prompts import Prompt
from ...llm_output import extract_json
from ...bedrock_utils import converse
if T.TYPE_CHECKING:
from mypy_boto3_bedrock_runtime import BedrockRuntimeClient
T_INJURY_INDICATOR = T.Literal["none", "minor", "moderate", "severe", "fatal"]
T_ESTIMATE_SEVERITY = T.Literal["low", "medium", "high"]
[docs]
class P1ExtractionOutput(BaseModel):
"""Structured output for the P1 FNOL extraction prompt.
Each field mirrors the JSON schema specified in the system prompt.
Pydantic validators enforce that the model returns values within the
expected formats and enumerations. When validation fails, the retry
loop in :func:`run` feeds the error back to the model so it can
self-correct — see :func:`run` for details.
"""
# fmt: off
date_of_loss: str = Field(description="Date of the incident (YYYY-MM-DD or 'unknown')")
time_of_loss: str = Field(description="Time of the incident (HH:MM 24-hour or 'unknown')")
location: str = Field(description="Where the incident occurred")
line_of_business_hint: str = Field(description="One of auto, property, workers_comp, general_liability, or ambiguous")
parties_involved: list[str] = Field(description="List of party roles")
damage_description: str = Field(description="Brief summary of damage")
injury_indicator: T_INJURY_INDICATOR = Field(description="none, minor, moderate, severe, or fatal")
police_report: str = Field(description="Report number if mentioned, otherwise 'none'")
evidence_available: list[str] = Field(description="List of available evidence types")
estimated_severity: T_ESTIMATE_SEVERITY = Field(description="low, medium, or high")
# fmt: on
@field_validator("date_of_loss")
@classmethod
def validate_date_of_loss(cls, v: str) -> str:
if v == "unknown":
return v
from datetime import datetime
try:
datetime.strptime(v, "%Y-%m-%d")
except ValueError:
raise ValueError(
f"date_of_loss must be 'YYYY-MM-DD' or 'unknown', got '{v}'"
)
return v
MAX_RETRIES = 3
"""Maximum number of converse API calls per :func:`run` invocation.
LLM output is non-deterministic — even with a well-crafted prompt, the model
may occasionally return values that violate the output schema (e.g. a date in
``MM/DD/YYYY`` instead of ``YYYY-MM-DD``, or a severity string outside the
allowed enum). Rather than failing immediately, we feed the Pydantic
validation error back to the model as a follow-up user message so it can
self-correct. Three attempts strikes a balance between resilience and cost:
most fixable errors resolve on the second try, and a third guards against
edge cases without runaway API spend.
"""
[docs]
def run_p1_extraction(
client: "BedrockRuntimeClient",
data: P1ExtractionUserPromptData,
prompt_version: str = "01",
model_id: str = "us.amazon.nova-2-lite-v1:0",
) -> P1ExtractionOutput:
"""Execute the P1 extraction prompt and return validated output.
**System prompt caching** — The system prompt is static (no Jinja
variables) by design. This lets us place a ``cachePoint`` after it so
that Bedrock caches the prefix across calls. When the same system
prompt is reused — whether across retries within a single ``run_p1_extraction()``
call or across independent invocations — subsequent requests hit the
cache and skip redundant input processing, reducing both latency and
cost.
**Why the user prompt is NOT cached** — The user prompt contains the
per-request FNOL narrative and is different for every claim. Caching
it would incur a cache-write cost on every call with virtually zero
chance of a cache hit, making it a net loss. During retries the user
prompt is already present in the ``messages`` history, so the model
sees it without any extra caching mechanism.
**Retry on validation failure** — LLM output is non-deterministic.
When Pydantic validation fails (e.g. wrong date format, invalid enum
value), we append the model's raw reply as an ``assistant`` message
and the validation error as a ``user`` message, then call the API
again. This gives the model concrete feedback on what went wrong so
it can self-correct. We allow up to ``MAX_RETRIES`` attempts; if all
fail, the last exception is re-raised.
"""
prompt = Prompt(id=PromptIdEnum.UC1_P1_EXTRACTION.value, version=prompt_version)
# System prompt is static — attach a cachePoint so Bedrock can reuse
# the cached prefix across calls (retries and independent invocations).
system = [
{"text": prompt.system_prompt_template.render()},
{"cachePoint": {"type": "default"}},
]
user_prompt = prompt.user_prompt_template.render(data=data)
messages: list[dict] = [
{"role": "user", "content": [{"text": user_prompt}]},
]
for attempt in range(MAX_RETRIES):
text = converse(client, model_id, system, messages)
json_obj = extract_json(text)
try:
return P1ExtractionOutput(**json_obj)
except (json.JSONDecodeError, ValidationError) as exc:
if attempt == MAX_RETRIES - 1:
raise
# Feed the validation error back so the model can self-correct.
error_msg = (
f"Your previous response failed validation:\n{exc}\n\n"
"Please return a corrected JSON object."
)
messages.append({"role": "assistant", "content": [{"text": text}]})
messages.append({"role": "user", "content": [{"text": error_msg}]})
raise Exception("Should never reach this line of code") # pragma: no cover