"""
Pradhya · NanoClaw Workshop · Unit 06
======================================

A minimal NanoClaw-style "second brain" in a single file. Implements the
four operations from the LLM Wiki pattern:

    ingest  — clean a source and file structured knowledge into the wiki
    query   — answer a question by reading the wiki
    file    — write or merge a wiki page (used by ingest)
    log     — append to the operation journal

Wiki layout:

    wiki/
    ├── index.md
    ├── log.md
    ├── people/
    ├── concepts/
    ├── events/
    └── ...

This is a teaching version. A production second-brain ships more polish:
connectors, scheduler, agent loop, retries. Read this file
to understand the *shape*; reach for the real repo when you go to
production.

Install:
    pip install anthropic

Run:
    export ANTHROPIC_API_KEY="sk-ant-..."
    python nanoclaw_demo.py ingest https://example.com/some-article
    python nanoclaw_demo.py query "what do I know about X?"
"""

from __future__ import annotations

import argparse
import datetime as dt
import json
import os
import pathlib
import re
import sys
import urllib.request

from anthropic import Anthropic

# ---------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------
WIKI_ROOT  = pathlib.Path(os.environ.get("WIKI_ROOT", "./wiki")).expanduser()
MODEL      = "claude-sonnet-4-6"
USER_AGENT = "NanoClaw-demo/1.0"

client = Anthropic()


# ---------------------------------------------------------------------
# Wiki primitives
# ---------------------------------------------------------------------
def _ensure_wiki() -> None:
    WIKI_ROOT.mkdir(parents=True, exist_ok=True)
    index = WIKI_ROOT / "index.md"
    if not index.exists():
        index.write_text("# Wiki Index\n\n_(empty — feed me sources)_\n", encoding="utf-8")
    log = WIKI_ROOT / "log.md"
    if not log.exists():
        log.write_text("# Wiki Operation Log\n\n", encoding="utf-8")


def log(line: str) -> None:
    stamp = dt.datetime.now().isoformat(timespec="seconds")
    with (WIKI_ROOT / "log.md").open("a", encoding="utf-8") as f:
        f.write(f"- {stamp}  {line}\n")


def _slugify(name: str) -> str:
    s = re.sub(r"[^a-zA-Z0-9]+", "-", name.lower()).strip("-")
    return s[:60] or "untitled"


def file_page(category: str, name: str, body: str, sources: list[str]) -> pathlib.Path:
    """Write or merge a wiki page. Merging is naive: append a new section
    with today's date. Real NanoClaw uses a smarter merger."""
    folder = WIKI_ROOT / category
    folder.mkdir(parents=True, exist_ok=True)
    path = folder / f"{_slugify(name)}.md"

    today  = dt.date.today().isoformat()
    header = (
        f"---\n"
        f"title: {name}\n"
        f"type: {category}\n"
        f"updated: {today}\n"
        f"sources:\n"
        + "".join(f"  - {s}\n" for s in sources)
        + "---\n\n"
    )
    if path.exists():
        existing = path.read_text(encoding="utf-8")
        # Strip the old frontmatter; we'll write a fresh one.
        existing_body = re.sub(r"^---.*?---\n+", "", existing, count=1, flags=re.DOTALL)
        merged = header + existing_body + f"\n\n## Update · {today}\n\n{body}\n"
        path.write_text(merged, encoding="utf-8")
        log(f"merged page  {category}/{path.name}")
    else:
        path.write_text(header + body + "\n", encoding="utf-8")
        log(f"created page {category}/{path.name}")
    _refresh_index()
    return path


def _refresh_index() -> None:
    """Rebuild index.md from the current contents of the wiki."""
    lines = ["# Wiki Index", "", f"_Updated: {dt.date.today().isoformat()}_", ""]
    for cat_dir in sorted(WIKI_ROOT.iterdir()):
        if not cat_dir.is_dir() or cat_dir.name.startswith("."):
            continue
        pages = sorted(cat_dir.glob("*.md"))
        if not pages:
            continue
        lines.append(f"## {cat_dir.name.capitalize()}")
        for p in pages:
            title = _read_title(p)
            rel   = p.relative_to(WIKI_ROOT)
            lines.append(f"- [{title}]({rel})")
        lines.append("")
    (WIKI_ROOT / "index.md").write_text("\n".join(lines), encoding="utf-8")


def _read_title(path: pathlib.Path) -> str:
    text = path.read_text(encoding="utf-8", errors="replace")
    m = re.search(r"^title:\s*(.+)$", text, flags=re.MULTILINE)
    return m.group(1).strip() if m else path.stem


def index_text() -> str:
    return (WIKI_ROOT / "index.md").read_text(encoding="utf-8")


# ---------------------------------------------------------------------
# Source fetching + de-cluttering
# ---------------------------------------------------------------------
def fetch_url(url: str) -> str:
    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
    with urllib.request.urlopen(req, timeout=30) as r:
        raw = r.read().decode("utf-8", errors="replace")
    return _defuddle(raw)


def _defuddle(html: str) -> str:
    """Strip script/style/nav/footer/header tags; collapse whitespace.
    A toy version of the defuddle skill — good enough for blog posts."""
    html = re.sub(r"<(script|style|nav|footer|header|aside).*?</\1>", " ",
                  html, flags=re.DOTALL | re.IGNORECASE)
    html = re.sub(r"<[^>]+>", " ", html)
    html = re.sub(r"&nbsp;", " ", html)
    html = re.sub(r"&amp;", "&", html)
    html = re.sub(r"\s+", " ", html)
    return html.strip()[:18000]   # cap so we don't blow the prompt budget


# ---------------------------------------------------------------------
# The two skills: ingest + query
# ---------------------------------------------------------------------
INGEST_SYSTEM = """You file a new source into the user's personal wiki.

Read the source. Then output ONE JSON object with these keys:

  "entities":  [ {"name", "type", "summary"} for each distinct person, place, org, or product worth its own page ]
  "concepts":  [ {"name", "summary", "related"} for each idea worth its own page ]
  "claims":    [ {"claim", "source_quote", "confidence"} for facts worth remembering ]
  "tags":      [list of free-text tags for cross-filing]

Rules:
- Be terse. Skip filler.
- Quote source spans verbatim only for load-bearing facts.
- Reuse existing wiki page names when they fit (see the index below).
- If the source has nothing worth filing, return empty arrays.

EXISTING WIKI INDEX:
"""


def ingest(url: str) -> None:
    _ensure_wiki()
    print(f"[ingest] fetching {url}")
    text = fetch_url(url)
    if len(text) < 200:
        print(f"[ingest] source too small ({len(text)} chars); skipping")
        return

    print(f"[ingest] cleaned to {len(text)} chars; sending to Claude")
    resp = client.messages.create(
        model=MODEL,
        max_tokens=2048,
        system=INGEST_SYSTEM + "\n" + index_text(),
        messages=[{"role": "user", "content": f"SOURCE: {url}\n\n{text}"}],
    )

    raw = resp.content[0].text.strip()
    raw = _extract_json(raw)
    try:
        parsed = json.loads(raw)
    except json.JSONDecodeError as e:
        print(f"[ingest] could not parse JSON from model: {e}")
        print(raw[:400])
        return

    for ent in parsed.get("entities", []):
        body = f"**{ent.get('type', 'entity').title()}**.\n\n{ent.get('summary', '')}\n"
        file_page("people" if ent.get("type") == "person" else "entities",
                  ent["name"], body, [url])

    for con in parsed.get("concepts", []):
        body = con.get("summary", "")
        related = con.get("related") or []
        if related:
            body += "\n\n**See also:** " + ", ".join(f"[[{r}]]" for r in related)
        file_page("concepts", con["name"], body, [url])

    if parsed.get("claims"):
        claims_body = "\n".join(
            f"- {c['claim']}  \n  > {c.get('source_quote', '')}  \n  _(confidence: {c.get('confidence', '?')})_"
            for c in parsed["claims"]
        )
        page_name = dt.date.today().isoformat() + "-claims"
        file_page("claims", page_name, claims_body, [url])

    log(f"ingested {url}  →  {len(parsed.get('entities', []))} entities, "
        f"{len(parsed.get('concepts', []))} concepts, "
        f"{len(parsed.get('claims', []))} claims")
    print("[ingest] done. see ./wiki/log.md")


QUERY_SYSTEM = """You answer questions using the user's wiki.

The wiki index is below. First, pick the 1-3 pages most likely to
contain the answer. Then ask to read them via the `read_wiki_page` tool.
After reading, compose a tight answer that cites the page names you used.

If the wiki does not contain the answer, say so and suggest a search query
the user could feed back as an ingest.

EXISTING WIKI INDEX:
"""


def read_wiki_page(rel_path: str) -> str:
    path = WIKI_ROOT / rel_path
    if not path.exists():
        return f"ERROR: no such page at {rel_path}"
    return path.read_text(encoding="utf-8")


def query(question: str) -> None:
    _ensure_wiki()
    tools = [
        {
            "name": "read_wiki_page",
            "description": "Read a wiki page by its relative path (e.g. 'people/avery-stone.md'). "
                           "Use this after consulting the index to fetch the actual page content.",
            "input_schema": {
                "type": "object",
                "properties": {"rel_path": {"type": "string"}},
                "required": ["rel_path"],
            },
        }
    ]
    messages = [{"role": "user", "content": question}]
    system = QUERY_SYSTEM + "\n" + index_text()

    for _ in range(6):
        resp = client.messages.create(
            model=MODEL, max_tokens=1500,
            system=system, tools=tools, messages=messages,
        )
        messages.append({"role": "assistant", "content": resp.content})
        if resp.stop_reason != "tool_use":
            for b in resp.content:
                if b.type == "text":
                    print(b.text)
            return
        tool_results = []
        for b in resp.content:
            if b.type != "tool_use":
                continue
            print(f"  → read_wiki_page({b.input.get('rel_path')})")
            content = read_wiki_page(b.input["rel_path"])
            tool_results.append({"type": "tool_result", "tool_use_id": b.id, "content": content})
        messages.append({"role": "user", "content": tool_results})


# ---------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------
def _extract_json(s: str) -> str:
    """Strip ```json fences if present."""
    s = s.strip()
    m = re.search(r"```(?:json)?\s*(.*?)\s*```", s, flags=re.DOTALL)
    return m.group(1) if m else s


def main() -> None:
    p = argparse.ArgumentParser(description="NanoClaw-style minimal second brain")
    sub = p.add_subparsers(dest="cmd", required=True)
    p_ing = sub.add_parser("ingest", help="File a source into the wiki")
    p_ing.add_argument("url")
    p_qry = sub.add_parser("query", help="Answer a question from the wiki")
    p_qry.add_argument("question", nargs="+")
    sub.add_parser("index", help="Print the current wiki index")

    args = p.parse_args()
    if not os.environ.get("ANTHROPIC_API_KEY"):
        sys.exit("ANTHROPIC_API_KEY is not set. Get one at console.anthropic.com.")

    if args.cmd == "ingest":
        ingest(args.url)
    elif args.cmd == "query":
        query(" ".join(args.question))
    elif args.cmd == "index":
        _ensure_wiki()
        print(index_text())


if __name__ == "__main__":
    main()