"""
oryza19k — a client-side *access cookbook* for the 19K Rice Genome Project (19K-RGP).

This is NOT a server or a "unified API". It is a thin Python helper that orchestrates
the data platforms' *existing* interfaces so a user can go, in one place, from a gene
of interest -> genotypes -> predicted effects -> climate association -> a trait prediction.

Honest access tiers (read before relying on any function):
  * Ensembl REST (GrameneOryza is Ensembl-based) ...... DOCUMENTED & STABLE   -> lookup_gene, region_features, vep_effects
  * Remote tabix streaming of bgzipped VCFs ........... STANDARD (needs server HTTP-range) -> region_genotypes(source="tabix")
  * SNP-Seek REST .................................... EXISTS (Mansueto 2017); exact paths
                                                       MUST BE CONFIRMED with the IRRI team -> region_genotypes(source="snpseek")  [TODO-CONFIRM]
  * Oryza CLIMtools .................................. NO REST API; downloadable tables only -> climate_for_gene, accession_climate
  * Pre-trained AI models (GitHub) ................... joblib pickle + imputer            -> predict_trait

Design guarantees:
  * Every networked call is wrapped so a failure raises a clear, actionable message
    (and notebooks fall back to a shipped precomputed table -> "always runs for a reviewer").
  * No credentials are ever embedded. Review-only endpoints are never hard-coded here.

Dependencies: pandas, numpy, requests, joblib (all standard). Optional: pysam (tabix),
pyarrow (fast wide-CSV / parquet). Functions degrade gracefully if optional deps are missing.

Authors: 19K-RGP team. License: MIT (matches the code repository).
"""
from __future__ import annotations

import io
import os
import sys
import time
import json
import subprocess
import urllib.request
import urllib.parse
import urllib.error
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Optional, Sequence, Union

import numpy as np
import pandas as pd

try:
    import requests
except Exception:  # pragma: no cover - requests is expected, but keep import-safe
    requests = None

__all__ = [
    "CONFIG", "REFERENCES", "TRAITS", "MODELS",
    "lookup_gene", "region_features", "vep_effects",
    "region_genotypes", "climate_for_gene", "accession_climate",
    "predict_trait", "summary_table", "choose_platform",
]

# ---------------------------------------------------------------------------
# Configuration (no credentials; override via environment or by editing CONFIG)
# ---------------------------------------------------------------------------

@dataclass
class Config:
    # Ensembl REST is the stable, documented backbone for gene/region/VEP queries.
    ensembl_rest: str = os.environ.get("ORYZA19K_ENSEMBL_REST", "https://rest.ensembl.org")
    ensembl_species: str = "oryza_sativa"

    # SNP-Seek REST base. The genotype/variety/SNP routes are documented in
    # Mansueto et al. 2017 (NAR) but the exact paths/params must be confirmed
    # with the IRRI team before publication. Left as a base only.
    snpseek_base: str = os.environ.get("ORYZA19K_SNPSEEK_BASE", "https://snp-seek.irri.org")

    # Public, anonymous VCF location for tabix streaming. The review build uses a
    # credentialed dev FTP; that is NOT placed here. Set to the public endpoint at
    # publication, or pass vcf_url=... explicitly.
    vcf_base: str = os.environ.get("ORYZA19K_VCF_BASE", "")  # e.g. https://<public-host>/19K-RGP/<ref>/

    # Local fallback data shipped with the package (precomputed tables, CLIMtools exports).
    data_dir: Path = field(default_factory=lambda: Path(
        os.environ.get("ORYZA19K_DATA_DIR", Path(__file__).resolve().parent.parent / "precomputed_tables")))

    # Where the pre-trained models live (a clone of github.com/YongZhou2019/19K-RGP).
    models_dir: Path = field(default_factory=lambda: Path(
        os.environ.get("ORYZA19K_MODELS_DIR", "")) if os.environ.get("ORYZA19K_MODELS_DIR") else Path(
        "AI-drive Predictive Phenotype Modeling"))

    # Optional Zenodo record for precomputed tables (filled in at publication).
    zenodo_record: str = os.environ.get("ORYZA19K_ZENODO", "")

    request_timeout: int = 30
    max_retries: int = 3


CONFIG = Config()

# The five platinum reference genomes (verbatim from the manuscript, lines 245-250).
REFERENCES = {
    "IRGSP-1.0": {"variety": "Nipponbare", "group": "GJ", "accession": "GCA_001433935.1"},
    "IR64RS2":   {"variety": "IR64",       "group": "XI", "accession": "this study (gap-free)"},
    "MH63RS3":   {"variety": "Minghui 63", "group": "XI", "accession": "CP054676-CP054688"},
    "ARC10497":  {"variety": "ARC 10497",  "group": "cB", "accession": "GCA_009831255.1"},
    "N22":       {"variety": "Nagina 22",  "group": "cA", "accession": "GCA_001952365.2"},
}

# The five AI-predicted agronomic traits (short codes used across the project).
TRAITS = {
    "hdg_80head": "Heading date (days to 80% heading)",
    "grwt100":    "100-grain weight",
    "grlt":       "Grain length",
    "grwd":       "Grain width",
    "glrw.ratio": "Grain length-to-width ratio",
}

# Registry of pre-trained models shipped in the repo. NOTE: at the time of writing
# only the heading-date model is public; the other four are listed here so the
# helper is ready once they are uploaded (see plan, open item #4).
MODELS = {
    "hdg_80head": {
        "model": "best_trained_model_hdg_80head2025.pkl",
        "imputer": "most_freq_imputer_hdg_80head2025.pkl",
        "features": "combined_selected_features_top_1000.csv",
        "subdir": "Prediction_on_new_samples/hdg_80head2025",
        "status": "public",
    },
    # "grwt100": {... to be added once uploaded ...},
}

ID_ALIASES = ("ID", "IID", "20K_VCF_Name", "accession", "Accession")


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _http_json(url: str, method: str = "GET", payload: Optional[dict] = None) -> Union[dict, list]:
    """Fetch JSON via `requests` if installed, else stdlib `urllib` (so the helper has no
    hard dependency). Handles HTTP 429 rate-limiting with Retry-After backoff."""
    headers = {"Content-Type": "application/json", "Accept": "application/json"}
    body = json.dumps(payload).encode() if payload is not None else None
    last_err = None
    for _ in range(CONFIG.max_retries):
        try:
            if requests is not None:
                r = requests.request(method, url, headers=headers, data=body,
                                     timeout=CONFIG.request_timeout)
                if r.status_code == 429:
                    time.sleep(float(r.headers.get("Retry-After", 1.0)) + 0.5)
                    continue
                r.raise_for_status()
                return r.json()
            req = urllib.request.Request(url, data=body, headers=headers, method=method)
            with urllib.request.urlopen(req, timeout=CONFIG.request_timeout) as resp:
                return json.loads(resp.read().decode())
        except urllib.error.HTTPError as e:  # urllib path
            if e.code == 429:
                time.sleep(float(e.headers.get("Retry-After", 1.0)) + 0.5)
                continue
            last_err = e
            break
        except Exception as e:
            last_err = e
            break
    raise RuntimeError(f"HTTP {method} failed: {url} ({type(last_err).__name__}: {last_err})")


def _ensembl_get(path: str, params: Optional[dict] = None) -> Union[dict, list]:
    """GET a JSON document from the Ensembl REST API (works with or without `requests`)."""
    url = f"{CONFIG.ensembl_rest}{path}"
    if params:
        sep = "&" if "?" in url else "?"
        url = f"{url}{sep}{urllib.parse.urlencode(params)}"
    return _http_json(url, "GET")


def _ensembl_post(path: str, payload: dict) -> Union[dict, list]:
    return _http_json(f"{CONFIG.ensembl_rest}{path}", "POST", payload)


def _detect_id_col(df: pd.DataFrame) -> str:
    for c in ID_ALIASES:
        if c in df.columns:
            return c
    # fall back to the first non-numeric column
    for c in df.columns:
        if not pd.api.types.is_numeric_dtype(df[c]):
            return c
    return df.columns[0]


def _detect_pheno_col(df: pd.DataFrame, trait: Optional[str]) -> Optional[str]:
    if trait and trait in df.columns:
        return trait
    for c in ("Phenotype", "phenotype", "y"):
        if c in df.columns:
            return c
    return None


# ---------------------------------------------------------------------------
# 1) GrameneOryza / Ensembl REST  (DOCUMENTED & STABLE)
# ---------------------------------------------------------------------------

def lookup_gene(gene: str, species: Optional[str] = None, expand: bool = True) -> dict:
    """Resolve a rice gene id or symbol to its coordinates via Ensembl REST.

    >>> g = lookup_gene("Os03t0752800")          # MADS50 transcript id
    >>> g["seq_region_name"], g["start"], g["end"]
    """
    species = species or CONFIG.ensembl_species
    params = {"expand": int(bool(expand))}
    # try id first, then symbol
    try:
        return _ensembl_get(f"/lookup/id/{gene}", params)
    except Exception:
        return _ensembl_get(f"/lookup/symbol/{species}/{gene}", params)


def region_features(region: str, species: Optional[str] = None,
                    feature: Union[str, Sequence[str]] = "gene") -> pd.DataFrame:
    """Features overlapping a region, e.g. region='3:1000000-1100000', feature='variation'."""
    species = species or CONFIG.ensembl_species
    feats = [feature] if isinstance(feature, str) else list(feature)
    params = [("feature", f) for f in feats]
    # requests encodes list params; build the query manually for clarity
    qs = "&".join(f"feature={f}" for f in feats)
    data = _ensembl_get(f"/overlap/region/{species}/{region}?{qs}")
    return pd.DataFrame(data)


def vep_effects(variants: Union[str, Sequence[str]], species: Optional[str] = None) -> pd.DataFrame:
    """Predicted consequences via the Ensembl VEP GET 'region' endpoint (verified format).

    Each variant is 'chrom:pos:alt' (SNV) or 'chrom:start-end:alt'.
    >>> vep_effects("3:31037240:A")   # MADS14 intron variant
    """
    species = species or CONFIG.ensembl_species
    items = [variants] if isinstance(variants, str) else list(variants)
    rows = []
    for v in items:
        parts = str(v).split(":")
        chrom, locus, allele = parts[0], parts[1], parts[2]
        region = locus if "-" in locus else f"{locus}-{locus}"
        data = _ensembl_get(f"/vep/{species}/region/{chrom}:{region}/{allele}")
        for rec in (data if isinstance(data, list) else [data]):
            for cons in rec.get("transcript_consequences", []) or [{}]:
                rows.append({
                    "input": rec.get("input"),
                    "most_severe": rec.get("most_severe_consequence"),
                    "gene_id": cons.get("gene_id"),
                    "transcript_id": cons.get("transcript_id"),
                    "consequence": ",".join(cons.get("consequence_terms", []) or []),
                    "impact": cons.get("impact"),
                })
    return pd.DataFrame(rows)


# ---------------------------------------------------------------------------
# 2) Genotypes — remote tabix (STANDARD) | SNP-Seek REST (CONFIRM) | local (offline)
# ---------------------------------------------------------------------------

def region_genotypes(chrom: str, start: int, end: int, ref: str = "IRGSP-1.0",
                     source: str = "tabix", vcf_url: Optional[str] = None,
                     accessions: Optional[Iterable[str]] = None,
                     local_parquet: Optional[Union[str, Path]] = None) -> pd.DataFrame:
    """Genotypes for a locus across the 19K-RGP.

    source="tabix"   : stream a region from a bgzipped+indexed VCF over HTTPS
                       (efficient: never downloads the whole 19K dataset).
    source="snpseek" : SNP-Seek REST genotype-by-region  [TODO-CONFIRM exact path with IRRI].
    source="local"   : slice a local parquet/CSV genotype matrix (offline fallback).
    """
    if ref not in REFERENCES:
        raise ValueError(f"Unknown reference '{ref}'. Choose from {list(REFERENCES)}.")

    if source == "tabix":
        return _genotypes_tabix(chrom, start, end, ref, vcf_url)
    if source == "snpseek":
        return _genotypes_snpseek(chrom, start, end, ref, accessions)
    if source == "local":
        return _genotypes_local(chrom, start, end, local_parquet, accessions)
    raise ValueError("source must be one of {'tabix','snpseek','local'}")


def _genotypes_tabix(chrom, start, end, ref, vcf_url) -> pd.DataFrame:
    """Stream a VCF region via pysam (preferred) or the `tabix` CLI."""
    if vcf_url is None:
        if not CONFIG.vcf_base:
            raise RuntimeError(
                "No VCF URL. Set CONFIG.vcf_base to the PUBLIC, anonymous bgzipped VCF host "
                "(review credentials must never be hard-coded), or pass vcf_url=...")
        vcf_url = f"{CONFIG.vcf_base.rstrip('/')}/19K-RGP.{ref}.snps.vcf.gz"
    region = f"{chrom}:{start}-{end}"
    # Prefer pysam (Python, HTTP range requests)
    try:
        import pysam  # type: ignore
        rows = []
        with pysam.VariantFile(vcf_url) as vf:
            for rec in vf.fetch(chrom, start - 1, end):
                rows.append({"chrom": rec.chrom, "pos": rec.pos, "ref": rec.ref,
                             "alt": ",".join(map(str, rec.alts or [])), "id": rec.id})
        return pd.DataFrame(rows)
    except ImportError:
        pass
    # Fall back to the tabix CLI
    try:
        out = subprocess.run(["tabix", "-h", vcf_url, region],
                             capture_output=True, text=True, check=True).stdout
        return _parse_vcf_text(out)
    except FileNotFoundError as e:
        raise RuntimeError(
            "Neither pysam nor the `tabix` CLI is available. Install one "
            "(`pip install pysam` or `conda install -c bioconda htslib`).") from e


def _parse_vcf_text(text: str) -> pd.DataFrame:
    rows, samples = [], None
    for line in text.splitlines():
        if line.startswith("##"):
            continue
        if line.startswith("#CHROM"):
            samples = line.split("\t")[9:]
            continue
        f = line.split("\t")
        if len(f) < 8:
            continue
        rows.append({"chrom": f[0], "pos": int(f[1]), "id": f[2], "ref": f[3], "alt": f[4]})
    return pd.DataFrame(rows)


def _genotypes_snpseek(chrom, start, end, ref, accessions) -> pd.DataFrame:
    """SNP-Seek REST genotype-by-region.

    The route below follows the SNP-Seek II design (Mansueto et al. 2017) but the
    EXACT endpoint path and parameters must be confirmed with the IRRI team before
    this is relied upon. Until then, prefer source="tabix" or source="local".
    """
    raise NotImplementedError(
        "SNP-Seek REST endpoint paths are pending confirmation with the IRRI team "
        "(McNally/Mauleon/Chebotarov). Use source='tabix' or source='local' meanwhile.")


def _genotypes_local(chrom, start, end, local_parquet, accessions) -> pd.DataFrame:
    """Slice a local genotype matrix (columns named '<chrom>_<pos>_<ref>_<alt>_<call>')."""
    path = Path(local_parquet) if local_parquet else (
        CONFIG.data_dir / "genotypes_with_phenos.parquet")
    if not path.exists():
        raise FileNotFoundError(f"Local genotype matrix not found: {path}")
    # read only the columns in the region (cheap on parquet)
    if path.suffix == ".parquet":
        import pyarrow.parquet as pq
        names = pq.ParquetFile(path).schema_arrow.names
    else:
        names = pd.read_csv(path, nrows=0).columns.tolist()
    id_col = next((c for c in ID_ALIASES if c in names), names[0])
    want = [id_col] + [c for c in names if c.startswith(f"{chrom}_")
                       and start <= _col_pos(c) <= end]
    if path.suffix == ".parquet":
        df = pd.read_parquet(path, columns=want)
    else:
        df = pd.read_csv(path, usecols=want)
    if accessions is not None:
        df = df[df[id_col].astype(str).isin({str(a) for a in accessions})]
    return df.reset_index(drop=True)


def _col_pos(col: str) -> int:
    """Extract the integer position from a SNP column name like 'Chr01_9233_T_C_C'."""
    parts = col.split("_")
    for p in parts[1:]:
        if p.isdigit():
            return int(p)
    return -1


# ---------------------------------------------------------------------------
# 3) Oryza CLIMtools  (NO REST API — downloadable tables / local exports)
# ---------------------------------------------------------------------------

def climate_for_gene(gene: str, group: str = "XI",
                     table: Optional[Union[str, Path]] = None) -> pd.DataFrame:
    """Climate variables associated with a gene (Oryza GenoCLIM result table).

    CLIMtools has no programmatic API; this reads an exported/precomputed GenoCLIM
    table (CSV/TSV). Ship the relevant export under precomputed_tables/ or pass `table`.
    """
    path = Path(table) if table else (CONFIG.data_dir / "climtools_genoclim.tsv")
    df = _read_table(path)
    cols = {c.lower(): c for c in df.columns}
    if "gene" in cols:
        df = df[df[cols["gene"]].astype(str).str.contains(gene, case=False, na=False)]
    if "group" in cols and group:
        df = df[df[cols["group"]].astype(str).str.upper() == group.upper()]
    return df.reset_index(drop=True)


def accession_climate(accessions: Optional[Iterable[str]] = None,
                      variables: Sequence[str] = ("BIO6",),
                      table: Optional[Union[str, Path]] = None) -> pd.DataFrame:
    """Per-accession geo-environmental values (OryzaCLIM export)."""
    path = Path(table) if table else (CONFIG.data_dir / "oryzaclim_accession_climate.tsv")
    df = _read_table(path)
    id_col = _detect_id_col(df)
    keep = [id_col] + [v for v in variables if v in df.columns]
    df = df[keep] if len(keep) > 1 else df
    if accessions is not None:
        df = df[df[id_col].astype(str).isin({str(a) for a in accessions})]
    return df.reset_index(drop=True)


# ---------------------------------------------------------------------------
# 4) Pre-trained AI trait prediction  (joblib pickle + imputer; top-1000 features)
# ---------------------------------------------------------------------------

def predict_trait(genotype_df: pd.DataFrame, trait: str = "hdg_80head",
                  models_dir: Optional[Union[str, Path]] = None,
                  return_frame: bool = True):
    """Predict an agronomic trait from a genotype matrix using a pre-trained model.

    Mirrors the repo's deployment interface (heading_date_predictor.py):
      model = joblib.load(<model>.pkl); X = df.drop([id, pheno]).values.astype(float64)
      y_pred = model.predict(X)
    The model expects the SHAP-selected top-1000 features; this function aligns the
    input columns to the model's feature list and imputes missing values.
    """
    import joblib
    if trait not in MODELS:
        raise ValueError(
            f"No pre-trained model registered for '{trait}'. Available: {list(MODELS)}. "
            "(Only the heading-date model is public so far; see plan open item #4.)")
    spec = MODELS[trait]
    base = Path(models_dir) if models_dir else CONFIG.models_dir
    mdir = base / spec["subdir"]
    model_path, feat_path = mdir / spec["model"], mdir / spec["features"]
    imp_path = mdir / spec["imputer"]
    if not model_path.exists():
        raise FileNotFoundError(
            f"Model not found: {model_path}. Clone github.com/YongZhou2019/19K-RGP and set "
            "models_dir to its 'AI-drive Predictive Phenotype Modeling' folder.")

    df = genotype_df.copy()
    id_col = _detect_id_col(df)
    ids = df[id_col].astype(str).values
    pheno_col = _detect_pheno_col(df, trait)
    drop = [c for c in (id_col, pheno_col) if c]
    feat_df = df.drop(columns=drop, errors="ignore")

    # align to the model's expected top-1000 features (by name), if shipped
    if feat_path.exists():
        feats = pd.read_csv(feat_path)
        feat_names = feats.iloc[:, 0].astype(str).tolist() if feats.shape[1] == 1 else list(feats.columns)
        for missing in [f for f in feat_names if f not in feat_df.columns]:
            feat_df[missing] = np.nan
        feat_df = feat_df[feat_names]

    X = feat_df.replace(-9, np.nan).values.astype("float64")
    if imp_path.exists():
        X = joblib.load(imp_path).transform(X)
    else:
        # simple most-frequent fallback if the imputer isn't present
        col_mode = pd.DataFrame(X).mode(axis=0).iloc[0].values
        inds = np.where(np.isnan(X))
        X[inds] = np.take(col_mode, inds[1])

    model = joblib.load(model_path)
    y_pred = model.predict(X)
    if not return_frame:
        return y_pred
    return pd.DataFrame({id_col: ids, f"{trait}_pred": y_pred})


# ---------------------------------------------------------------------------
# 5) Precomputed summaries  (local fallback now; Zenodo DOI at publication)
# ---------------------------------------------------------------------------

def summary_table(name: str, source: str = "local") -> pd.DataFrame:
    """Fetch a precomputed summary table by short name (offline-friendly).

    Known names: 'accessions', 'allele_freq_by_group', 'gene_variant_summary',
    'benchmark', 'shap_to_gene', 'mads_haplotypes', plus the per-trait phenotype tables.
    """
    catalog = {
        "accessions": "accession_passport.tsv",
        "phenotypes": "phenotypes_by_accession.tsv",
        "allele_freq_by_group": "allele_frequency_core_snps.tsv",
        "gene_variant_summary": "gene_variant_summary.tsv",
        "benchmark": "genomic_prediction_benchmark.tsv",
        "shap_to_gene": "shap_top_snp_to_gene.tsv",
        "mads_haplotypes": "mads14_mads50_haplotypes.tsv",
    }
    fname = catalog.get(name, name if name.endswith((".tsv", ".csv", ".parquet")) else f"{name}.tsv")
    if source == "local":
        return _read_table(CONFIG.data_dir / fname)
    if source == "zenodo":
        if not CONFIG.zenodo_record:
            raise RuntimeError("CONFIG.zenodo_record is not set (DOI minted at publication).")
        url = f"https://zenodo.org/record/{CONFIG.zenodo_record}/files/{fname}?download=1"
        return _read_table_url(url)
    raise ValueError("source must be 'local' or 'zenodo'")


# ---------------------------------------------------------------------------
# 6) "Choose the right platform" router
# ---------------------------------------------------------------------------

_DECISION = [
    ("Inspect genotypes/haplotypes/allele freq for a small region, all accessions",
     "SNP-Seek v3", "genotype viewer / REST", "allele_freq_by_group"),
    ("Browse a genome, gene models, predicted variant effects",
     "GrameneOryza", "Ensembl REST: lookup_gene / vep_effects", "gene_variant_summary"),
    ("Slice a locus across all 19,035 genomes without downloading",
     "Gramene FTP", "region_genotypes(source='tabix')", "—"),
    ("Climate <-> genome (G x E) associations for a gene",
     "Oryza CLIMtools", "climate_for_gene (table)", "—"),
    ("Predict an agronomic trait from genotypes",
     "Code & Models", "predict_trait", "benchmark"),
    ("Get raw reads / full variant archive / a citable DOI",
     "Archives (NCBI/EVA/KAUST)", "accession download", "accessions"),
]


def choose_platform(as_frame: bool = True):
    """Return the question -> platform -> method -> precomputed-table decision table."""
    cols = ["question", "platform", "method", "precomputed_table"]
    if as_frame:
        return pd.DataFrame(_DECISION, columns=cols)
    return _DECISION


# ---------------------------------------------------------------------------
# Small IO utilities
# ---------------------------------------------------------------------------

def _read_table(path: Union[str, Path]) -> pd.DataFrame:
    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(
            f"Precomputed table not found: {path}. Generate it (see precomputed_tables/) "
            "or fetch from Zenodo once the DOI is minted.")
    if path.suffix == ".parquet":
        return pd.read_parquet(path)
    sep = "\t" if path.suffix in (".tsv", ".tab") else ","
    return pd.read_csv(path, sep=sep)


def _read_table_url(url: str) -> pd.DataFrame:
    if requests is not None:
        r = requests.get(url, timeout=CONFIG.request_timeout)
        r.raise_for_status()
        text = r.text
    else:
        with urllib.request.urlopen(url, timeout=CONFIG.request_timeout) as resp:
            text = resp.read().decode()
    sep = "\t" if url.split("?")[0].endswith((".tsv", ".tab")) else ","
    return pd.read_csv(io.StringIO(text), sep=sep)


# ---------------------------------------------------------------------------
# Smoke test (runs fully offline)
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    print("oryza19k — access cookbook for the 19K-RGP")
    print(f"  Ensembl REST : {CONFIG.ensembl_rest}")
    print(f"  data_dir     : {CONFIG.data_dir}")
    print(f"  references   : {', '.join(REFERENCES)}")
    print(f"  traits       : {', '.join(TRAITS)}")
    print("\nChoose-the-platform decision table:")
    print(choose_platform().to_string(index=False))
    print("\nThis module is import-safe and offline-friendly. Live functions require network "
          "(Ensembl REST) or local data (parquet / precomputed tables).")
