Source code for prompt_risk.judges.j1_over_permissive

# -*- coding: utf-8 -*-

"""
J1 Over-Permissive Authorization Judge.

Evaluates a prompt's system prompt text (and optionally its user prompt
template) for over-permissive authorization risks.  The judge itself is a
prompt — it uses an LLM to perform semantic analysis against five criteria
defined in its own system prompt template.

This module is **use-case-agnostic**.  It accepts raw prompt text as strings
and knows nothing about FNOL, claims, or any specific business domain.
Use-case-specific wrappers (e.g. ``uc.uc1.j1_uc1_p1``) handle loading
prompt files and calling this function.
"""

import typing as T
import json
import re

from pydantic import BaseModel, Field, ValidationError

from ..constants import PromptIdEnum
from ..prompts import Prompt
from ..llm_output import extract_json
from ..bedrock_utils import converse

if T.TYPE_CHECKING:
    from mypy_boto3_bedrock_runtime import BedrockRuntimeClient


# ---------------------------------------------------------------------------
# Input / Output models
# ---------------------------------------------------------------------------
[docs] class J1UserPromptData(BaseModel): """Input data for the J1 judge user prompt template.""" target_system_prompt: str target_user_prompt_template: T.Optional[str] = None
T_SEVERITY = T.Literal["major", "minor", "pass"] T_OVERALL_RISK = T.Literal["critical", "high", "medium", "low", "pass"]
[docs] class J1Finding(BaseModel): """A single criterion-level finding from the J1 judge.""" criterion: str severity: T_SEVERITY evidence: str explanation: str recommendation: str
[docs] class J1Result(BaseModel): """Complete J1 judge evaluation result.""" overall_risk: T_OVERALL_RISK score: int = Field(ge=1, le=5) findings: list[J1Finding] summary: str
# --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- MAX_RETRIES = 3 # --------------------------------------------------------------------------- # Main entry point # ---------------------------------------------------------------------------
[docs] def run_j1_over_permissive( client: "BedrockRuntimeClient", data: J1UserPromptData, judge_version: str = "01", model_id: str = "us.amazon.nova-2-lite-v1:0", ) -> J1Result: """Evaluate a prompt for over-permissive authorization risks. Parameters ---------- client: Bedrock Runtime client. data: The target prompt texts to evaluate. judge_version: Which version of the J1 judge prompt to use. model_id: Bedrock model ID for the judge LLM. Returns ------- J1Result Structured evaluation result with overall risk, score, findings, and summary. """ judge_prompt = Prompt( id=PromptIdEnum.JUDGE_J1_OVER_PERMISSIVE.value, version=judge_version, ) system = [ {"text": judge_prompt.system_prompt_template.render()}, {"cachePoint": {"type": "default"}}, ] user_prompt = judge_prompt.user_prompt_template.render(data=data) messages: list[dict] = [ {"role": "user", "content": [{"text": user_prompt}]}, ] for attempt in range(MAX_RETRIES): text = converse(client, model_id, system, messages) json_obj = extract_json(text) try: return J1Result(**json_obj) except (json.JSONDecodeError, ValidationError) as exc: if attempt == MAX_RETRIES - 1: raise error_msg = ( f"Your previous response failed validation:\n{exc}\n\n" "Please return a corrected JSON object." ) messages.append({"role": "assistant", "content": [{"text": text}]}) messages.append({"role": "user", "content": [{"text": error_msg}]}) raise Exception("Should never reach this line of code") # pragma: no cover
# --------------------------------------------------------------------------- # Pretty-print # --------------------------------------------------------------------------- _SEVERITY_ICON = {"pass": "✅", "minor": "⚠️", "major": "❌"} _RISK_ICON = {"pass": "✅", "low": "🟢", "medium": "🟡", "high": "🟠", "critical": "🔴"}