Source code for prompt_risk.llm_output
# -*- coding: utf-8 -*-
"""
LLM output post-processing utilities.
Reusable helpers for cleaning and extracting structured data from raw LLM
text responses.
"""
import json
import re
import typing as T
from .exc import JsonExtractionError
[docs]
def extract_json(text: str) -> T.Any:
"""Extract and parse a single JSON object from raw LLM response text.
Assumes the LLM output contains exactly **one** JSON value — either
bare or wrapped in a single markdown code fence (````` ```json … ``` `````
or ````` ``` … ``` `````). If a code fence is present, only its content
is parsed; any text outside the fence is ignored. If no fence is found,
the entire *text* is treated as JSON.
This function does **not** handle multiple JSON values in a single
response. If the LLM returns more than one JSON block, only the first
fenced block (or the full text when unfenced) is considered.
Parameters
----------
text:
Raw LLM response text, potentially wrapped in markdown code fences.
Returns
-------
Any
The parsed JSON value (typically a ``dict`` or ``list``).
Raises
------
JsonExtractionError
If the extracted text is not valid JSON. The exception carries the
full raw LLM output (``raw_output`` attribute) and the original
parse error as ``__cause__`` for downstream inspection.
"""
match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
raw = match.group(1) if match else text
try:
return json.loads(raw)
except (json.JSONDecodeError, ValueError) as exc:
raise JsonExtractionError(raw_output=text, cause=exc) from exc