#!/usr/bin/env python3
import argparse
import datetime as dt
import json
import os
import pathlib
import subprocess
import sys

ROOT = pathlib.Path(__file__).resolve().parents[1]
RUNS = ROOT / 'runs'
SEARCH = ROOT / 'bin' / 'searx-search'


def slugify(text: str) -> str:
    keep = []
    for ch in text.lower():
        if ch.isalnum():
            keep.append(ch)
        elif ch in {' ', '-', '_'}:
            keep.append('-')
    slug = ''.join(keep)
    while '--' in slug:
        slug = slug.replace('--', '-')
    return slug.strip('-')[:80] or 'query'


def run_cmd(args: list[str]) -> str:
    proc = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if proc.returncode != 0:
        raise SystemExit(proc.stderr.strip() or f'command failed: {args}')
    return proc.stdout


def main() -> int:
    p = argparse.ArgumentParser(description='Run local research search and checkpoint results to disk.')
    p.add_argument('query', nargs='+')
    p.add_argument('-c', '--category', default='general')
    p.add_argument('-n', '--limit', type=int, default=8)
    p.add_argument('-l', '--language', default='en-US')
    p.add_argument('--note', default='')
    args = p.parse_args()

    query = ' '.join(args.query).strip()
    ts = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
    run_id = f'{ts}-{slugify(query)}'
    run_dir = RUNS / run_id
    run_dir.mkdir(parents=True, exist_ok=True)

    search_json = run_cmd([
        str(SEARCH), '--json', '-c', args.category, '-n', str(args.limit), '-l', args.language, query
    ])
    search_obj = json.loads(search_json)

    payload = {
        'runId': run_id,
        'createdAt': dt.datetime.now(dt.timezone.utc).isoformat(),
        'query': query,
        'category': args.category,
        'language': args.language,
        'note': args.note,
        'resultCount': search_obj.get('resultCount', 0),
        'unresponsiveEngines': search_obj.get('unresponsiveEngines', []),
        'results': search_obj.get('results', []),
    }

    (run_dir / 'search.json').write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding='utf-8')

    lines = [
        f'# Research Run: {query}',
        '',
        f'- Run ID: `{run_id}`',
        f'- Created: {payload["createdAt"]}',
        f'- Category: `{args.category}`',
        f'- Language: `{args.language}`',
        f'- Result count: {payload["resultCount"]}',
    ]
    if args.note:
        lines.append(f'- Note: {args.note}')
    if payload['unresponsiveEngines']:
        lines.append(f'- Unresponsive engines: {", ".join(payload["unresponsiveEngines"])}')
    lines.append('')
    lines.append('## Results')
    lines.append('')
    if not payload['results']:
        lines.append('_No results returned._')
    else:
        for i, item in enumerate(payload['results'], 1):
            lines.append(f'{i}. [{item.get("title") or "(untitled)"}]({item.get("url") or ""})')
            meta = []
            if item.get('engine'):
                meta.append(f'engine={item["engine"]}')
            if item.get('category'):
                meta.append(f'category={item["category"]}')
            if item.get('publishedDate'):
                meta.append(f'published={item["publishedDate"]}')
            if meta:
                lines.append(f'   - ' + ' | '.join(meta))
            if item.get('content'):
                lines.append(f'   - {item["content"][:400]}')
            lines.append('')

    (run_dir / 'report.md').write_text('\n'.join(lines), encoding='utf-8')
    print(str(run_dir))
    return 0


if __name__ == '__main__':
    raise SystemExit(main())
