"""``muse cat`` — print the raw content of one or more tracked files.

The domain-agnostic, file-level counterpart to ``muse code cat`` (symbol-level)
and the future ``muse midi cat`` (pattern-level).  Mirrors the relationship
between ``muse blame`` (line-level) and ``muse code blame`` (symbol-level).

Usage::

    muse cat README.md
    muse cat src/main.py --at v1.2.3
    muse cat src/main.py --staged
    muse cat src/main.py --at abc123 --json
    muse cat file1.py file2.py --json

Symbol addresses (``file.py::Symbol``) are not accepted here — use
``muse code cat`` for symbol-level reads.

JSON output schemas::

    Single file (--json):
        {
          "path":       "<path>",
          "content":         "<utf-8 text, errors replaced>",
          "size_bytes":      <int>,
          "source_ref":      "working tree" | "staged" | "commit <sha8> on <branch>",
          "duration_ms": <float>
        }

    Multiple files (--json):
        {
          "files":   [{"path": ..., "content": ..., "size_bytes": ...}, ...],
          "errors":  [{"path": ..., "error": ..., "error_code": ..., "hint": ...}, ...],
          "source_ref":      "<str>",
          "duration_ms": <float>
        }

Exit codes::

    0 — all files read successfully
    1 — file not tracked, symlink rejected, path traversal, bad ref, bad address
    2 — not inside a Muse repository
    3 — I/O error reading from the object store or disk
"""

import argparse
import json
import pathlib
import sys

from muse.core.errors import ExitCode
from muse.core.object_store import read_object
from muse.core.repo import require_repo
from muse.core.types import Manifest
from muse.core.refs import read_current_branch
from muse.core.commits import resolve_commit_ref
from muse.core.snapshots import (
    get_commit_snapshot_manifest,
    get_head_snapshot_manifest,
)
from muse.core.envelope import EnvelopeJson, make_envelope
from muse.core.timing import start_timer
from muse.core.validation import sanitize_display
from muse.plugins.code.stage import read_stage
from typing import TypedDict

# ---------------------------------------------------------------------------
# TypedDicts — JSON output schema
# ---------------------------------------------------------------------------

class _CatFileEntryJson(TypedDict):
    """One file entry in the multi-file output."""

    path: str
    content: str
    size_bytes: int
    source_ref: str

class _CatSingleJson(EnvelopeJson):
    """JSON output for ``muse cat`` with a single file."""

    path: str
    content: str
    size_bytes: int
    source_ref: str

class _CatErrorEntryJson(TypedDict):
    """One error entry in the multi-file output."""

    path: str
    error: str
    error_code: str
    hint: str

class _CatMultiJson(EnvelopeJson):
    """JSON output for ``muse cat`` with multiple files or errors."""

    files: list[_CatFileEntryJson]
    errors: list[_CatErrorEntryJson]
    source_ref: str

# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

class _FileError(Exception):
    """Raised when a file cannot be read, with a machine-parseable code."""

    def __init__(self, message: str, code: str, hint: str = "") -> None:
        super().__init__(message)
        self.code = code
        self.hint = hint

def _get_file_bytes(
    root: pathlib.Path,
    file_path: str,
    manifest: Manifest,
    source_is_workdir: bool,
) -> bytes:
    """Return raw bytes for *file_path* from disk or the object store.

    A file is "tracked" if it appears in the HEAD snapshot manifest OR in the
    stage index (staged-but-not-committed files).  Files that exist only on
    disk without being tracked are rejected with FILE_NOT_TRACKED — this
    prevents silent reads of arbitrary workspace files that Muse knows nothing
    about.

    Security
    --------
    Workdir reads reject symlinks and enforce path containment to prevent
    directory traversal attacks.
    """
    if source_is_workdir:
        disk = root / file_path
        if disk.is_symlink():
            raise _FileError(
                f"refusing to read symlink: {file_path}",
                code="SYMLINK_REJECTED",
                hint="dereference the symlink and commit the real file instead",
            )
        try:
            disk.resolve().relative_to(root.resolve())
        except ValueError:
            raise _FileError(
                f"path escapes repository root: {file_path}",
                code="PATH_TRAVERSAL",
                hint="file paths must be relative to the repository root",
            )

        stage = read_stage(root)
        in_manifest = file_path in manifest
        stage_entry = stage.get(file_path)
        in_stage = stage_entry is not None and stage_entry["mode"] != "D"

        if not in_manifest and not in_stage:
            raise _FileError(
                f"file not tracked: {file_path}",
                code="FILE_NOT_TRACKED",
                hint="use 'muse code add <file>' to track it",
            )

        try:
            return disk.read_bytes()
        except (FileNotFoundError, OSError):
            pass  # deleted from disk — fall through to object store

        # File deleted from disk: try stage blob first, then manifest blob.
        if in_stage and stage_entry is not None:
            raw = read_object(root, stage_entry["object_id"])
            if raw is not None:
                return raw
        if in_manifest:
            raw = read_object(root, manifest[file_path])
            if raw is not None:
                return raw
        raise _FileError(
            f"blob not found in object store for: {file_path}",
            code="BLOB_NOT_FOUND",
            hint="the object store may be corrupted; try `muse gc` to diagnose",
        )

    if file_path not in manifest:
        raise _FileError(
            f"file not tracked: {file_path}",
            code="FILE_NOT_TRACKED",
            hint="use 'muse code add <file>' to track it",
        )

    raw = read_object(root, manifest[file_path])
    if raw is None:
        raise _FileError(
            f"blob not found in object store: {manifest[file_path]}",
            code="BLOB_NOT_FOUND",
            hint="the object store may be corrupted; try `muse gc` to diagnose",
        )
    return raw

# ---------------------------------------------------------------------------
# Registration
# ---------------------------------------------------------------------------

def register(
    subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]",
) -> None:
    """Register ``muse cat`` on *subparsers*."""
    parser = subparsers.add_parser(
        "cat",
        help="Print the raw content of one or more tracked files.",
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "files",
        nargs="*",
        metavar="file",
        help=(
            "One or more tracked file paths. "
            "Symbol addresses (file.py::Symbol) are not accepted — "
            "use 'muse code cat' for symbol-level reads."
        ),
    )
    parser.add_argument(
        "--at", default=None, metavar="REF",
        help=(
            "Commit ref (SHA prefix, branch, tag) to read from. "
            "Defaults to the working tree (uncommitted edits visible). "
            "Mutually exclusive with --staged."
        ),
    )
    parser.add_argument(
        "--staged", action="store_true", default=False,
        help=(
            "Read the staged version of each file — the content that would be "
            "committed if you ran 'muse commit' now. Ignores working-tree edits "
            "made after the last 'muse code add'. Mirrors 'git show :path'."
        ),
    )
    parser.add_argument(
        "--json", "-j", action="store_true", dest="json_out",
        help="Emit machine-readable JSON.",
    )
    parser.set_defaults(func=run)

# ---------------------------------------------------------------------------
# Handler
# ---------------------------------------------------------------------------

def run(args: argparse.Namespace) -> None:
    """Print the raw content of one or more tracked files.

    Reads file bytes from the committed snapshot or the working tree (with
    ``--at``).  For multiple files emits a ``files`` array; for a single file
    emits a flat object.  Binary and non-UTF-8 files are included as-is when
    ``--json`` is requested (content may be lossy if not valid UTF-8).

    Agent quickstart
    ----------------
    ::

        muse cat src/billing.py --json
        muse cat src/billing.py --at HEAD~5 --json
        muse cat src/billing.py src/auth.py --json

    JSON fields (single file)
    -------------------------
    file_path   Workspace-relative file path.
    content     Full file content as a UTF-8 string.
    size_bytes  File size in bytes.
    source_ref  ``"working tree"`` or ``"commit <sha>"`` or ``"<branch>"``.

    JSON fields (multiple files)
    ----------------------------
    files      List of file entry objects (same fields as single-file mode).
    errors     List of error strings for files that could not be read.
    source_ref Same source description for all files in the request.

    Exit codes
    ----------
    0  All files found and printed.
    1  Any file not found or not tracked.
    2  Not inside a Muse repository.
    """
    elapsed = start_timer()
    files: list[str] = args.files
    at: str | None = args.at
    staged: bool = getattr(args, "staged", False)
    json_out: bool = args.json_out

    if staged and at is not None:
        msg = "--staged and --at are mutually exclusive"
        if json_out:
            print(json.dumps({"error": msg, "error_code": "MUTUALLY_EXCLUSIVE"}))
        else:
            print(f"❌ {msg}", file=sys.stderr)
        raise SystemExit(ExitCode.USER_ERROR)

    if not files:
        msg = "no file given — usage: muse cat <file> [<file> ...] [--at <ref>]"
        if json_out:
            print(json.dumps({"error": msg}))
        else:
            print(f"❌ {msg}", file=sys.stderr)
        raise SystemExit(ExitCode.USER_ERROR)

    # Reject symbol addresses up front.
    for f in files:
        if "::" in f:
            msg = (
                f"'{sanitize_display(f)}' looks like a symbol address. "
                f"Use 'muse code cat' for symbol-level reads."
            )
            if json_out:
                print(json.dumps({"error": msg, "error_code": "SYMBOL_ADDRESS_REJECTED"}))
            else:
                print(f"❌ {msg}", file=sys.stderr)
            raise SystemExit(ExitCode.USER_ERROR)

    # Reject paths with control characters (newlines, null bytes, ANSI).
    for f in files:
        for ch in ("\n", "\r", "\x00"):
            if ch in f:
                msg = f"invalid path: control character in '{sanitize_display(f)}'"
                if json_out:
                    print(json.dumps({"error": msg, "error_code": "INVALID_PATH"}))
                else:
                    print(f"❌ {msg}", file=sys.stderr)
                raise SystemExit(ExitCode.USER_ERROR)
        if "\x1b" in f:
            msg = f"invalid path: ANSI escape in '{sanitize_display(f)}'"
            if json_out:
                print(json.dumps({"error": msg, "error_code": "INVALID_PATH"}))
            else:
                print(f"❌ {msg}", file=sys.stderr)
            raise SystemExit(ExitCode.USER_ERROR)

    root = require_repo()
    branch = read_current_branch(root)

    source_is_workdir = at is None and not staged
    manifest: Manifest

    if staged:
        head_manifest = get_head_snapshot_manifest(root, branch) or {}
        stage = read_stage(root)
        staged_manifest: dict[str, str] = dict(head_manifest)
        for _path, _entry in stage.items():
            if _path.startswith(".muse/"):
                continue
            if _entry["mode"] == "D":
                staged_manifest.pop(_path, None)
            else:
                staged_manifest[_path] = _entry["object_id"]
        manifest = staged_manifest
        source_ref = "staged"
    elif source_is_workdir:
        manifest = get_head_snapshot_manifest(root, branch) or {}
        source_ref = "working tree"
    else:
        resolved = resolve_commit_ref(root, branch, at)
        if resolved is None:
            msg = f"ref not found: {sanitize_display(at or '')}"
            if json_out:
                print(json.dumps({"error": msg, "error_code": "REF_NOT_FOUND"}))
            else:
                print(f"❌ {msg}", file=sys.stderr)
            raise SystemExit(ExitCode.USER_ERROR)
        manifest = get_commit_snapshot_manifest(root, resolved.commit_id) or {}
        source_ref = f"commit {resolved.commit_id} on {branch}"

    results: list[_CatFileEntryJson] = []
    errors: list[_CatErrorEntryJson] = []

    for file_path in files:
        try:
            raw = _get_file_bytes(root, file_path, manifest, source_is_workdir)
        except _FileError as exc:
            errors.append(_CatErrorEntryJson(
                path=file_path,
                error=sanitize_display(str(exc)),
                error_code=exc.code,
                hint=exc.hint,
            ))
            if not json_out:
                print(f"❌ {sanitize_display(str(exc))}", file=sys.stderr)
            continue
        content = raw.decode("utf-8", errors="replace")
        results.append(_CatFileEntryJson(
            path=file_path,
            content=content,
            size_bytes=len(raw),
            source_ref=source_ref,
        ))

    has_errors = bool(errors)

    if json_out:
        if len(files) == 1 and not has_errors:
            print(json.dumps(_CatSingleJson(**make_envelope(elapsed), **results[0])))
        else:
            print(json.dumps(_CatMultiJson(
                **make_envelope(elapsed),
                files=results,
                errors=errors,
                source_ref=source_ref,
            )))
    else:
        for entry in results:
            if len(files) > 1:
                print(f"# {entry['path']}  ({source_ref})")
            sys.stdout.write(str(entry["content"]))
            if len(files) > 1:
                print()

    raise SystemExit(0 if not has_errors else ExitCode.USER_ERROR)