#!/usr/bin/env python3
"""
DeadCatFound Leak Scanner (Python)
Finds exposed secrets in your code, files, git history and live website.

USAGE
  python leak_scan.py [path]              scan a directory (default: current)
  python leak_scan.py --url https://...   scan a live website's HTML + JS

Educational security tooling — review every finding yourself. A clean scan is
not a guarantee; a hit is something to fix now.
"""
import os, re, sys, stat, subprocess
from urllib.request import urlopen, Request

PATTERNS = {
    "Anthropic API key":  r"sk-ant-[A-Za-z0-9_\-]{20,}",
    "OpenAI API key":     r"sk-(?:proj-)?[A-Za-z0-9]{20,}",
    "AWS access key id":  r"AKIA[0-9A-Z]{16}",
    "Google API key":     r"AIza[0-9A-Za-z_\-]{35}",
    "GitHub token":       r"gh[pousr]_[A-Za-z0-9]{36,}",
    "Stripe live key":    r"[rsp]k_live_[0-9A-Za-z]{20,}",
    "Slack token":        r"xox[baprs]-[0-9A-Za-z\-]{10,}",
    "Private key block":  r"-----BEGIN (?:RSA |EC |OPENSSH |PGP )?PRIVATE KEY-----",
    "JSON Web Token":     r"eyJ[A-Za-z0-9_\-]{10,}\.eyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{6,}",
    "Hardcoded secret":   r"(?i)(?:api[_\-]?key|secret|token|passwd|password)\s*[:=]\s*"
                          r"['\"][^'\"\n]{8,}['\"]",
}
SKIP_DIRS = {".git", "node_modules", "venv", ".venv", "__pycache__",
             "dist", "build", ".wrangler", ".next"}
SENSITIVE = re.compile(r"(\.env|credential|secret|\.pem$|\.key$|id_rsa)", re.I)
findings = []

def redact(s):
    s = s.strip()
    return s if len(s) <= 12 else s[:6] + "…" + s[-4:]

def scan_text(path, text):
    for i, line in enumerate(text.splitlines(), 1):
        if len(line) > 4000:
            continue
        for name, pat in PATTERNS.items():
            for m in re.finditer(pat, line):
                findings.append(("LEAK", f"{path}:{i}", f"{name} → {redact(m.group(0))}"))

def scan_dir(root):
    for dp, dn, fn in os.walk(root):
        dn[:] = [d for d in dn if d not in SKIP_DIRS]
        for f in fn:
            p = os.path.join(dp, f)
            try:
                if os.path.getsize(p) > 2_000_000:
                    continue
                with open(p, "r", errors="ignore") as fh:
                    scan_text(p, fh.read())
            except (OSError, UnicodeError):
                continue
            if SENSITIVE.search(f):
                mode = stat.S_IMODE(os.stat(p).st_mode)
                if mode & (stat.S_IRGRP | stat.S_IROTH):
                    findings.append(("PERM", p,
                        f"sensitive file is group/other-readable (mode {oct(mode)}) "
                        f"— run: chmod 600 {p}"))

def scan_git(root):
    try:
        out = subprocess.run(["git", "-C", root, "ls-files"],
                             capture_output=True, text=True, timeout=20)
        for f in out.stdout.splitlines():
            if SENSITIVE.search(os.path.basename(f)):
                findings.append(("GIT", f,
                    "sensitive file is TRACKED by git — remove it and add to .gitignore"))
    except Exception:
        pass

def scan_url(url):
    hdr = {"User-Agent": "DeadCatFound-LeakScanner"}
    try:
        html = urlopen(Request(url, headers=hdr), timeout=20).read().decode("utf-8", "ignore")
    except Exception as e:
        print(f"  could not fetch {url}: {e}"); return
    scan_text(url, html)
    for js in set(re.findall(r'src=["\']([^"\']+\.js[^"\']*)["\']', html)):
        full = js if js.startswith("http") else url.rstrip("/") + "/" + js.lstrip("/")
        try:
            scan_text(full, urlopen(Request(full, headers=hdr), timeout=20)
                      .read().decode("utf-8", "ignore"))
        except Exception:
            pass

def main():
    args = sys.argv[1:]
    print("=" * 60 + "\n  DEADCATFOUND LEAK SCANNER\n" + "=" * 60)
    if args and args[0] == "--url" and len(args) > 1:
        print(f"Scanning website: {args[1]}")
        scan_url(args[1])
    else:
        root = args[0] if args else "."
        print(f"Scanning directory: {os.path.abspath(root)}")
        scan_dir(root)
        scan_git(root)
    if not findings:
        print("\n  CLEAN — no exposed secrets detected.\n"
              "  (Not a guarantee. Re-scan after every change.)")
    else:
        print(f"\n  {len(findings)} ISSUE(S) FOUND — fix these now:\n")
        for kind, where, detail in findings:
            print(f"  [{kind}] {where}\n        {detail}")
    print("=" * 60)
    sys.exit(1 if findings else 0)

if __name__ == "__main__":
    main()
