#!/usr/bin/env python3
"""
Bot Validation Lab — brutally honest backtest validator.
DeadCatFound · educational use only · not financial advice.

Feed it a CSV of your bot's per-trade returns and it tells you whether the
edge is real or whether you've fooled yourself. Three tests:

  1. SIGNIFICANCE   — is the mean trade return distinguishable from zero?
  2. IN/OUT-OF-SAMPLE — does the edge survive on the second half of trades?
  3. MONTE CARLO    — reshuffle the trade order 10,000 times: what is the
                      realistic range of total return and max drawdown?

USAGE
  python validator.py trades.csv [return_column]

The CSV needs one column of per-trade returns as decimals (0.012 = +1.2%)
or percentages (1.2). The script auto-detects. Default column: 'return'.

Requires: pandas, numpy   ( pip install pandas numpy )
"""
import sys
import math
import numpy as np
import pandas as pd


def load_returns(path, col):
    df = pd.read_csv(path)
    if col not in df.columns:
        # fall back to the first numeric column
        numeric = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
        if not numeric:
            sys.exit(f"No numeric column found. Columns: {list(df.columns)}")
        col = numeric[0]
        print(f"(column '{col}' used)")
    r = pd.to_numeric(df[col], errors="coerce").dropna().to_numpy()
    if len(r) == 0:
        sys.exit("No usable return values.")
    # auto-detect percent vs decimal: if the typical magnitude is > 1.5, assume %
    if np.median(np.abs(r)) > 1.5:
        print("(values look like percentages — converting to decimals)")
        r = r / 100.0
    return r


def max_drawdown(equity):
    peak = np.maximum.accumulate(equity)
    return float(np.min((equity - peak) / peak))


def significance(r):
    n = len(r)
    mean = r.mean()
    sd = r.std(ddof=1) if n > 1 else 0.0
    # t-statistic for mean return != 0
    t = mean / (sd / np.sqrt(n)) if sd > 0 else 0.0
    # two-sided p-value via the normal approximation (no scipy dependency)
    p = 2 * (1 - 0.5 * (1 + math.erf(abs(t) / math.sqrt(2))))
    return n, mean, sd, t, p


def in_out_sample(r):
    half = len(r) // 2
    if half < 5:
        return None
    a, b = r[:half], r[half:]
    return a.mean(), b.mean(), (a > 0).mean(), (b > 0).mean()


def monte_carlo(r, runs=10000):
    n = len(r)
    totals, drawdowns = np.empty(runs), np.empty(runs)
    for i in range(runs):
        shuffled = np.random.choice(r, size=n, replace=True)
        equity = np.cumprod(1 + shuffled)
        totals[i] = equity[-1] - 1
        drawdowns[i] = max_drawdown(np.concatenate([[1.0], equity]))
    return totals, drawdowns


def pctl(a, q):
    return float(np.percentile(a, q))


def main():
    if len(sys.argv) < 2:
        sys.exit(__doc__)
    path = sys.argv[1]
    col = sys.argv[2] if len(sys.argv) > 2 else "return"
    r = load_returns(path, col)

    print("\n" + "=" * 60)
    print("  BOT VALIDATION LAB — HONEST REPORT")
    print("=" * 60)

    # ---- 1. significance ----
    n, mean, sd, t, p = significance(r)
    equity = np.cumprod(1 + r)
    print(f"\n[1] SIGNIFICANCE  ({n} trades)")
    print(f"    Mean trade return : {mean*100:+.3f}%")
    print(f"    Std dev           : {sd*100:.3f}%")
    print(f"    Total return      : {(equity[-1]-1)*100:+.1f}%")
    print(f"    Win rate          : {(r>0).mean()*100:.1f}%")
    print(f"    t-stat / p-value  : {t:.2f} / {p:.4f}")
    if p < 0.05 and mean > 0:
        print("    VERDICT: edge is statistically significant.")
    else:
        print("    VERDICT: NOT significant — this could be luck. Be skeptical.")

    # ---- 2. in/out-of-sample ----
    ios = in_out_sample(r)
    print("\n[2] IN-SAMPLE vs OUT-OF-SAMPLE")
    if ios is None:
        print("    Too few trades to split (need >= 10).")
    else:
        a_m, b_m, a_w, b_w = ios
        print(f"    First half  : mean {a_m*100:+.3f}%  win {a_w*100:.0f}%")
        print(f"    Second half : mean {b_m*100:+.3f}%  win {b_w*100:.0f}%")
        if b_m <= 0:
            print("    VERDICT: edge COLLAPSES out-of-sample — likely overfit.")
        elif b_m < a_m * 0.5:
            print("    VERDICT: edge decays badly out-of-sample — fragile.")
        else:
            print("    VERDICT: edge holds out-of-sample — encouraging.")

    # ---- 3. monte carlo ----
    totals, drawdowns = monte_carlo(r)
    print("\n[3] MONTE CARLO  (10,000 reshuffles of trade order)")
    print(f"    Total return  5th–95th pct : "
          f"{pctl(totals,5)*100:+.1f}%  to  {pctl(totals,95)*100:+.1f}%")
    print(f"    Median total return        : {pctl(totals,50)*100:+.1f}%")
    print(f"    Probability of a loss      : {(totals<0).mean()*100:.1f}%")
    print(f"    Max drawdown  median       : {pctl(drawdowns,50)*100:.1f}%")
    print(f"    Max drawdown  worst 5%     : {pctl(drawdowns,5)*100:.1f}%")
    print("    -> Size your account so the WORST-5% drawdown is survivable.")

    print("\n" + "=" * 60)
    print("  If the bot fails test 1 or 2, do not trade it with real money.")
    print("=" * 60 + "\n")


if __name__ == "__main__":
    main()