"""
Stake.com API discovery — strategy:
1. Load the homepage (which passes CF), grab all JS bundle URLs
2. Download and parse bundles for GraphQL query strings
3. Also try the stake.com API with cf_clearance cookie
"""

import json
import time
import re
from datetime import datetime

captured = {
    "js_bundles": [],
    "gql_queries_found": [],
    "gql_operation_names": set(),
    "cookies": {},
    "api_responses": [],
}

def run():
    from playwright.sync_api import sync_playwright
    import urllib.request

    js_urls = []

    def on_response(resp):
        url = resp.url
        # Capture JS bundle URLs
        if url.endswith(".js") and "stake.com" in url and "_nuxt" in url:
            js_urls.append(url)
        # Capture any successful API responses
        if "_api/graphql" in url and resp.status == 200:
            try:
                body = resp.json()
                op = None
                rb = resp.request.post_data
                if rb:
                    try:
                        rb_parsed = json.loads(rb)
                        op = rb_parsed.get("operationName") if isinstance(rb_parsed, dict) else None
                    except:
                        pass
                captured["api_responses"].append({
                    "operation": op,
                    "status": resp.status,
                    "response": body
                })
                print(f"  [API 200!] op={op}")
            except:
                pass

    with sync_playwright() as p:
        print("[*] Firefox launch")
        browser = p.firefox.launch(headless=False)
        ctx = browser.new_context(
            viewport={"width": 1366, "height": 768},
            user_agent="Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0",
        )
        page = ctx.new_page()
        page.on("response", on_response)

        print("[*] Loading homepage ...")
        try:
            page.goto("https://stake.com/", timeout=40000, wait_until="networkidle")
        except Exception as e:
            print(f"  warn: {e}")

        # Wait for CF to clear
        for i in range(10):
            title = page.title().lower()
            if "just a moment" not in title:
                break
            print(f"  CF wait {i*4}s ...")
            time.sleep(4)

        print(f"  Page: {page.url} | {page.title()}")
        time.sleep(5)

        # Grab cookies BEFORE navigating further
        all_cookies = ctx.cookies()
        stake_cookies = {c["name"]: c["value"] for c in all_cookies if "stake" in c.get("domain", "")}
        captured["cookies"] = stake_cookies
        print(f"  Got cookies: {list(stake_cookies.keys())}")

        # Grab the page's HTML to find __NUXT__ state or config
        print("\n[*] Checking page state ...")
        try:
            state = page.evaluate("""
                () => {
                    // Try various global state objects
                    const candidates = [
                        window.__NUXT__,
                        window.__INITIAL_STATE__,
                        window.__APP_STATE__,
                    ];
                    for (const c of candidates) {
                        if (c) {
                            try { return JSON.stringify(c, null, 2).slice(0, 10000); }
                            catch(e) { return String(c).slice(0, 1000); }
                        }
                    }
                    // Try to find config object
                    const scripts = Array.from(document.querySelectorAll('script')).map(s => s.textContent);
                    for (const s of scripts) {
                        if (s.includes('graphql') || s.includes('_api')) {
                            return 'FOUND_IN_SCRIPT: ' + s.slice(0, 2000);
                        }
                    }
                    return null;
                }
            """)
            if state:
                print(f"  State found: {state[:500]}")
                captured["page_state"] = state
        except Exception as e:
            print(f"  warn state: {e}")

        # Get all script src URLs from the page
        print("\n[*] Collecting JS bundle URLs ...")
        try:
            script_urls = page.evaluate("""
                () => Array.from(document.querySelectorAll('script[src]'))
                     .map(s => s.src)
                     .filter(s => s.includes('stake.com'))
            """)
            for url in script_urls:
                if url not in js_urls:
                    js_urls.append(url)
            print(f"  Found {len(js_urls)} JS bundles from script tags")
        except Exception as e:
            print(f"  warn scripts: {e}")

        # Also try navigating to sports page now that we have cf_clearance
        print("\n[*] Trying sports page with cf_clearance cookie ...")
        try:
            page.goto("https://stake.com/sports/soccer", timeout=40000,
                      wait_until="domcontentloaded")
            time.sleep(3)
            for _ in range(8):
                title = page.title().lower()
                if "just a moment" not in title:
                    print(f"  Sports page loaded: {page.url} | {page.title()}")
                    break
                print(f"  still CF ...")
                time.sleep(5)
        except Exception as e:
            print(f"  warn sports: {e}")

        time.sleep(10)  # give network time
        final_cookies = ctx.cookies()
        captured["cookies"] = {c["name"]: c["value"] for c in final_cookies
                               if "stake" in c.get("domain", "")}
        print(f"  Final cookies: {list(captured['cookies'].keys())}")

        browser.close()

    # ── Now download and parse JS bundles for GraphQL queries ─────────────────
    print(f"\n[*] Analyzing {len(js_urls)} JS bundles for GraphQL queries ...")

    # Build cookie header
    cookie_header = "; ".join(f"{k}={v}" for k, v in captured["cookies"].items())

    gql_pattern = re.compile(
        r'(?:query|mutation|subscription)\s+(\w+)[^`"\']*[`"\']?\s*\{[^}]+\}',
        re.DOTALL
    )
    op_name_pattern = re.compile(r'operationName["\s:]+["\'](\w+)["\']')
    query_string_pattern = re.compile(r'(?:query|mutation)\s+(\w+)\s*[\(\{]')

    all_op_names = set()

    for i, url in enumerate(js_urls[:30]):  # limit to first 30 bundles
        try:
            req = urllib.request.Request(url, headers={
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0",
                "Cookie": cookie_header,
                "Referer": "https://stake.com/",
            })
            with urllib.request.urlopen(req, timeout=15) as resp:
                content = resp.read().decode("utf-8", errors="replace")

            # Find operation names
            ops = op_name_pattern.findall(content)
            query_names = query_string_pattern.findall(content)

            if ops or query_names:
                bundle_ops = set(ops) | set(query_names)
                all_op_names |= bundle_ops
                print(f"  Bundle {i+1}: {url.split('/')[-1][:30]:30} -> {len(bundle_ops)} ops: {list(bundle_ops)[:8]}")

                # Extract full query strings around operation names
                for op in ops[:5]:
                    # Find surrounding context
                    idx = content.find(op)
                    if idx > 0:
                        start = max(0, idx - 200)
                        end = min(len(content), idx + 500)
                        context_str = content[start:end]
                        captured["gql_queries_found"].append({
                            "bundle": url.split("/")[-1],
                            "operation": op,
                            "context": context_str,
                        })

            # Also look for the API URL
            if "_api/graphql" in content:
                print(f"    ** Contains graphql endpoint reference **")
                idx = content.find("_api/graphql")
                print(f"       context: {content[max(0,idx-100):idx+100]}")

        except Exception as e:
            pass  # many bundles will 404 or fail

    print(f"\n[*] Total unique operation names found: {len(all_op_names)}")
    for op in sorted(all_op_names):
        print(f"  - {op}")

    captured["gql_operation_names"] = list(all_op_names)

    out = "/home/cyborg/Desktop/claude/arb_bot/tools/stake_discovery_results.json"
    with open(out, "w") as f:
        json.dump(captured, f, indent=2, default=str)
    print(f"\n[*] Saved to {out}")
    return captured

if __name__ == "__main__":
    data = run()

    print("\n" + "="*70)
    print("OPERATION NAMES FOUND IN JS BUNDLES (sports-related)")
    print("="*70)
    sports_kw = ["sport", "soccer", "football", "basket", "tennis", "match",
                 "event", "fixture", "bet", "market", "odd", "league", "live",
                 "prematch", "upcoming", "odds"]
    for op in sorted(data.get("gql_operation_names", [])):
        if any(kw in op.lower() for kw in sports_kw):
            print(f"  ** {op}")
        else:
            print(f"     {op}")

    print("\n" + "="*70)
    print("QUERY CONTEXTS (sports-related)")
    print("="*70)
    for q in data.get("gql_queries_found", []):
        op = q.get("operation", "")
        if any(kw in op.lower() for kw in sports_kw):
            print(f"\n  [{op}] from {q['bundle']}")
            print(f"  {q['context'][:400]}")
