"""
Pradhya · The Capable Series · Day 04 · Capstone
================================================

A working multi-step research agent in ~250 lines.

Reads the question from CLI, plans 4-6 sub-steps, then iterates the
agent loop (Plan -> Act -> Observe -> Reflect) calling the Claude
Messages API with three tools (web_search, fetch_url, save_briefing)
until it produces a polished 3-paragraph briefing.

Run it:
    python research_agent.py "How is Phoenix planning for the 2027
                              Colorado River cuts?"

Setup:
    pip install -r requirements.txt
    export ANTHROPIC_API_KEY="sk-ant-..."

This is intentionally written so each section of the file maps to one
stage of the loop. Search for `# === PLAN ===` etc. as you read.
"""

from __future__ import annotations

import argparse
import datetime as dt
import json
import os
import pathlib
import re
import sys
import urllib.parse
import urllib.request

from anthropic import Anthropic

MODEL = "claude-sonnet-4-6"
MAX_LOOPS = 12          # safety cap; the loop will stop sooner when done
BRIEFINGS_DIR = pathlib.Path(__file__).parent / "briefings"
USER_AGENT = "Pradhya-CapableSeries-ResearchAgent/1.0"

# The client is built lazily so that *importing* this module never requires an
# API key. (The eval harness imports run_one; only an actual run needs a key.)
_client: Anthropic | None = None


def _get_client() -> Anthropic:
    global _client
    if _client is None:
        _client = Anthropic()
    return _client


# Token usage from the most recent _run() — i.e. the last run()/run_one() call.
# The eval harness reads this to compute real cost without having to change
# run_one's clean (answer_text, trace) return signature.
LAST_USAGE: dict = {}


# ---------------------------------------------------------------------------
# === TOOL IMPLEMENTATIONS ===
# These run locally. The model never executes them — it only asks to.
# ---------------------------------------------------------------------------

def web_search(query: str, num_results: int = 5) -> list[dict]:
    """Lightweight DuckDuckGo HTML search (no API key required).

    Returns up to `num_results` results, each with `title`, `url`, and a
    short `snippet`. We intentionally keep this small and dependency-free
    so the agent runs out of the box on a workshop participant's laptop.
    """
    q = urllib.parse.quote_plus(query)
    url = f"https://duckduckgo.com/html/?q={q}"
    try:
        req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
        with urllib.request.urlopen(req, timeout=15) as r:
            html = r.read().decode("utf-8", errors="replace")
    except Exception as e:
        return [{"error": f"search failed: {e}"}]

    # Naive parse — good enough for a teaching demo.
    pattern = re.compile(
        r'<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)</a>'
        r'.*?<a[^>]+class="result__snippet"[^>]*>(.*?)</a>',
        re.DOTALL,
    )
    results: list[dict] = []
    for m in pattern.finditer(html):
        raw_url = m.group(1)
        # DuckDuckGo wraps the real URL in a redirect; pull it back out.
        if "uddg=" in raw_url:
            try:
                raw_url = urllib.parse.unquote(raw_url.split("uddg=", 1)[1].split("&", 1)[0])
            except Exception:
                pass
        title = _strip_tags(m.group(2)).strip()
        snippet = _strip_tags(m.group(3)).strip()
        if not title or not raw_url.startswith("http"):
            continue
        results.append({"title": title, "url": raw_url, "snippet": snippet[:300]})
        if len(results) >= num_results:
            break
    return results or [{"error": "no results parsed"}]


def fetch_url(url: str, max_chars: int = 6000) -> str:
    """Fetch a URL and return a trimmed plain-text version.

    Trims to `max_chars` to keep token usage reasonable. We do not run
    JavaScript — we treat each page as a server-rendered document.
    """
    try:
        req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
        with urllib.request.urlopen(req, timeout=20) as r:
            raw = r.read().decode("utf-8", errors="replace")
    except Exception as e:
        return f"FETCH_ERROR: {e}"

    text = _strip_tags(raw)
    text = re.sub(r"\s+\n", "\n", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()[:max_chars]


def save_briefing(title: str, body: str) -> str:
    """Persist the final briefing under ./briefings/ with a timestamp."""
    BRIEFINGS_DIR.mkdir(exist_ok=True)
    slug = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")[:60] or "briefing"
    stamp = dt.datetime.now().strftime("%Y-%m-%d-%H%M")
    path = BRIEFINGS_DIR / f"{stamp}-{slug}.md"
    path.write_text(f"# {title}\n\n{body}\n", encoding="utf-8")
    return str(path)


def _strip_tags(s: str) -> str:
    s = re.sub(r"<script.*?</script>", " ", s, flags=re.DOTALL | re.IGNORECASE)
    s = re.sub(r"<style.*?</style>", " ", s, flags=re.DOTALL | re.IGNORECASE)
    s = re.sub(r"<[^>]+>", " ", s)
    s = re.sub(r"&nbsp;", " ", s)
    s = re.sub(r"&amp;", "&", s)
    s = re.sub(r"&lt;", "<", s)
    s = re.sub(r"&gt;", ">", s)
    return s


TOOL_FNS = {
    "web_search":     lambda **a: web_search(a["query"], a.get("num_results", 5)),
    "fetch_url":      lambda **a: fetch_url(a["url"]),
    "save_briefing":  lambda **a: save_briefing(a["title"], a["body"]),
}


# ---------------------------------------------------------------------------
# === TOOL DECLARATIONS ===
# These are the only things the model sees. The descriptions are how the
# model decides when to call each tool — they matter more than the code.
# ---------------------------------------------------------------------------

TOOLS = [
    {
        "name": "web_search",
        "description": (
            "Search the public web for a query and return a list of "
            "result titles, URLs, and short snippets. Use this first "
            "when you need recent information or to discover authoritative "
            "sources. Returns up to num_results items (default 5)."
        ),
        "input_schema": {
            "type": "object",
            "properties": {
                "query": {"type": "string"},
                "num_results": {"type": "integer", "minimum": 1, "maximum": 10},
            },
            "required": ["query"],
        },
    },
    {
        "name": "fetch_url",
        "description": (
            "Fetch a URL and return its plain-text content (HTML tags "
            "stripped, trimmed to ~6000 characters). Use this after "
            "web_search to read the most authoritative two or three "
            "results in full. Don't fetch every result — pick the ones "
            "most likely to repay the cost."
        ),
        "input_schema": {
            "type": "object",
            "properties": {"url": {"type": "string"}},
            "required": ["url"],
        },
    },
    {
        "name": "save_briefing",
        "description": (
            "Save the final polished briefing to disk. Call this exactly "
            "once at the end, when you are confident the briefing is "
            "ready. After calling this tool, return a one-sentence "
            "summary of what you saved and stop."
        ),
        "input_schema": {
            "type": "object",
            "properties": {
                "title": {"type": "string"},
                "body":  {"type": "string"},
            },
            "required": ["title", "body"],
        },
    },
]


# ---------------------------------------------------------------------------
# === SYSTEM PROMPT ===
# The whole agent's personality and discipline live here. This is where
# the curriculum from Day 02 (Role + Constraint + Critique) gets baked in.
# ---------------------------------------------------------------------------

SYSTEM_PROMPT = """You are Pradhya's Research Agent.

You take a research question and produce a tight, sourced briefing.
Your output is three paragraphs, ~500 words total, the kind of thing
a busy executive would read on the way to a meeting.

Follow this loop, explicitly. At the start of each turn, output a
one-line label so the user can watch you think:

  [PLAN] ...      — when you are deciding the next step
  [ACT] ...       — right before you call a tool
  [OBSERVE] ...   — when you have just read a tool result
  [REFLECT] ...   — when you are deciding whether you have enough

Rules:
- Search first, read second, draft third. Do not skip steps.
- Read no more than 4 URLs total. Pick the most authoritative.
- Quote a specific fact, with its source URL, at least twice in the
  briefing.
- After you draft, critique your own draft. List three things a
  thoughtful skeptic would push back on. Then revise.
- If you are not sure about a specific number or date, say so in the
  briefing rather than guessing.
- End by calling save_briefing exactly once. Then write a single
  sentence summary of what you saved, and stop.

Voice: claim-first. Plain words. No "leverage," "robust," or
"transformative." Trust the reader's intelligence.
"""


# ---------------------------------------------------------------------------
# === THE LOOP ===
# Plan -> Act -> Observe -> Reflect, until the model is done.
# ---------------------------------------------------------------------------

def _run(question: str, verbose: bool = True) -> tuple[str, list[dict]]:
    """Run the agent loop once.

    Returns (answer_text, trace):
      - answer_text : all of the model's text, joined — the watchable
        [PLAN]/[ACT]/[OBSERVE]/[REFLECT] narration plus the final summary.
      - trace       : list of {"tool": name, "input": {...}}, one entry per
        tool the model called, in order.

    Side effect: total token usage for the run is left in module-global
    LAST_USAGE so callers (the eval harness) can read real cost.
    `verbose` gates the human-facing prints used by the CLI.
    """
    if verbose:
        print(f"\n[goal] {question}\n")

    messages: list[dict] = [{"role": "user", "content": question}]
    trace: list[dict] = []
    texts: list[str] = []
    in_tokens = out_tokens = 0

    for turn in range(1, MAX_LOOPS + 1):
        resp = _get_client().messages.create(
            model=MODEL,
            system=SYSTEM_PROMPT,
            max_tokens=2048,
            tools=TOOLS,
            messages=messages,
        )

        # Accumulate real token usage across every turn of the loop.
        usage = getattr(resp, "usage", None)
        if usage is not None:
            in_tokens += getattr(usage, "input_tokens", 0) or 0
            out_tokens += getattr(usage, "output_tokens", 0) or 0

        # Capture any text the model produced (and echo it when verbose).
        for block in resp.content:
            if block.type == "text" and block.text.strip():
                texts.append(block.text.rstrip())
                if verbose:
                    print(block.text.rstrip())

        # If the model is done, we are done.
        messages.append({"role": "assistant", "content": resp.content})
        if resp.stop_reason != "tool_use":
            if verbose:
                print("\n[done] agent halted naturally.\n")
            break

        # Otherwise, execute every tool the model asked for, record it in the
        # trace, and feed the results back as the next user turn.
        tool_results: list[dict] = []
        for block in resp.content:
            if block.type != "tool_use":
                continue
            name, args = block.name, block.input
            trace.append({"tool": name, "input": args})
            if verbose:
                print(f"  → tool {name}({_short(args)})")
            try:
                output = TOOL_FNS[name](**args)
            except Exception as e:
                output = f"ERROR: {type(e).__name__}: {e}"
            tool_results.append({
                "type": "tool_result",
                "tool_use_id": block.id,
                "content": _stringify(output),
            })
        messages.append({"role": "user", "content": tool_results})
    else:
        # Loop exhausted MAX_LOOPS without a natural stop.
        if verbose:
            print("\n[stop] hit MAX_LOOPS — the agent didn't finish.\n")

    LAST_USAGE.clear()
    LAST_USAGE.update(
        {"input_tokens": in_tokens, "output_tokens": out_tokens, "model": MODEL}
    )
    return "\n".join(texts), trace


def run(question: str) -> None:
    """CLI entry point: run the agent and stream its thinking to stdout."""
    _run(question, verbose=True)


def run_one(question: str) -> tuple[str, list[dict]]:
    """Run the agent once, quietly, and return (answer_text, trace).

    trace is a list of {"tool": name, "input": {...}} for each tool call.
    Intended for evals: see LAST_USAGE for the run's token totals.
    """
    return _run(question, verbose=False)


# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------

def _short(args: dict, n: int = 80) -> str:
    s = json.dumps(args, ensure_ascii=False)
    return s if len(s) <= n else s[:n - 1] + "…"


def _stringify(x) -> str:
    if isinstance(x, str):
        return x
    return json.dumps(x, ensure_ascii=False, indent=None)[:8000]


def main() -> None:
    parser = argparse.ArgumentParser(description="Pradhya research agent")
    parser.add_argument("question", nargs="+", help="The research question")
    args = parser.parse_args()
    question = " ".join(args.question).strip()
    if not os.environ.get("ANTHROPIC_API_KEY"):
        sys.exit("ANTHROPIC_API_KEY is not set. Get one at console.anthropic.com.")
    run(question)


if __name__ == "__main__":
    main()