Source code for pylmrob.summary

# SPDX-License-Identifier: GPL-3.0-or-later
"""``summary.lmrob``-equivalent output for ``LmRobResults``.

Mirrors ``robustbase:::summary.lmrob`` and ``robustbase:::print.summary.lmrob``:

- Coefficient table with ``Estimate``, ``Std. Error``, ``t value``, ``Pr(>|t|)``
  using the t-distribution with ``df.residual`` degrees of freedom.
- Robust residual standard error = ``scale``.
- Robust multiple R-squared and adjusted R-squared computed from the rweights-
  weighted total and residual sum-of-squares with the family-specific
  correction factor ``E[wgt(r)] / E[r psi(r)]``.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
    from pylmrob.control import Control
    from pylmrob.results import LmRobResults


# R's summary.lmrob: tabulated correc factor for default tunings of each family.
# Values copied verbatim from robustbase R/lmrob.MM.R::summary.lmrob.
_R_SQUARED_CORREC_TABLE: dict[str, float] = {
    "bisquare": 1.207617,
    "biweight": 1.207617,
    "welsh": 1.224617,
    "optimal": 1.068939,
    "hampel": 1.166891,
    "lqq": 1.159232,
}


def _ggw_correc(tuning: tuple[float, ...]) -> float | None:
    """R's summary.lmrob ggw special-case: four hand-tabulated tunings, else None."""
    if len(tuning) != 4:
        return None
    table = {
        (-0.5, 1.0, 0.95): 1.121708,
        (-0.5, 1.5, 0.95): 1.163192,
        (-0.5, 1.0, 0.85): 1.33517,
        (-0.5, 1.5, 0.85): 1.395828,
    }
    key = (float(tuning[0]), float(tuning[1]), float(tuning[2]))
    return table.get(key)


def _numeric_correc(psi_family: str, tuning: tuple[float, ...]) -> float:
    """Generic ``E[wgt(r)] / E[r psi(r)]`` under the standard normal."""
    from scipy import integrate

    from pylmrob.psi import psi as _psi
    from pylmrob.psi import wgt as _wgt

    inv_sqrt2pi = 1.0 / np.sqrt(2.0 * np.pi)

    def num(t: float) -> float:
        return (
            float(_wgt(np.array([t]), psi_family, tuning)[0]) * np.exp(-0.5 * t * t) * inv_sqrt2pi
        )

    def den(t: float) -> float:
        psi_val = float(_psi(np.array([t]), psi_family, tuning)[0])
        return t * psi_val * np.exp(-0.5 * t * t) * inv_sqrt2pi

    enum, _ = integrate.quad(num, -20.0, 20.0, epsabs=1e-10)
    eden, _ = integrate.quad(den, -20.0, 20.0, epsabs=1e-10)
    if eden <= 0:
        return 1.0
    return enum / eden


def _is_default_tuning(psi_family: str, tuning: tuple[float, ...]) -> bool:
    from pylmrob.psi import _PSI_TUNING_DEFAULT_PSI

    default = _PSI_TUNING_DEFAULT_PSI.get(psi_family.lower())
    if default is None or len(default) != len(tuning):
        return False
    return all(abs(a - b) < 1e-10 for a, b in zip(default, tuning, strict=True))


def _correc_factor(psi_family: str, tuning: tuple[float, ...]) -> float:
    fam = psi_family.lower()
    if fam == "ggw":
        # R uses a literal-tuning lookup for ggw, independent of our default-table check.
        ggw = _ggw_correc(tuning)
        if ggw is not None:
            return ggw
        return _numeric_correc(psi_family, tuning)
    if fam in _R_SQUARED_CORREC_TABLE and _is_default_tuning(psi_family, tuning):
        return _R_SQUARED_CORREC_TABLE[fam]
    return _numeric_correc(psi_family, tuning)



[docs]
@dataclass
class SummaryLmRob:
    """``summary.lmrob`` analogue. Stringifies to an R-style printout."""

    coefficients: np.ndarray
    term_names: list[str]
    scale: float
    r_squared: float
    adj_r_squared: float
    df_residual: int
    nobs: int
    residuals: np.ndarray
    rweights: np.ndarray
    cov: np.ndarray
    converged: bool
    n_iter: int
    control: Control
    has_intercept: bool

    style: str = "r"  # "r" (default) or "statsmodels"
    # ``detail="brief"`` (default) matches the historic output. ``"full"``
    # appends a small footer with init method, init scale, init iter count,
    # MM iter count, and the engine/RNG that drove the fit; useful for
    # debugging convergence issues without ``trace_lev=3``.
    detail: str = "brief"
    init_info: dict[str, object] | None = None
    engine_c: bool | None = None
    rng: str | None = None

    def __str__(self) -> str:
        return self.render(style=self.style, detail=self.detail)


[docs]
    def render(self, *, style: str = "r", detail: str = "brief") -> str:
        """Render the summary table.

        Parameters
        ----------
        style
            ``"r"`` (default): R-style ``summary.lmrob`` output.
            ``"statsmodels"``: a fixed-width table matching the
            ``statsmodels.iolib.summary.Summary`` layout for users who
            pipe pylmrob fits into statsmodels-shaped reporting code.
        detail
            ``"brief"`` (default) emits the standard summary. ``"full"``
            appends a footer with the init method, init scale, MM iter
            count, and engine settings (engine_c, rng). Use this when
            debugging convergence.
        """
        if detail not in ("brief", "full"):
            raise ValueError(f"detail must be 'brief' or 'full', got {detail!r}")
        if style == "statsmodels":
            text = self._render_statsmodels()
        elif style == "r":
            text = self._render_r()
        else:
            raise ValueError(f"style must be 'r' or 'statsmodels', got {style!r}")
        if detail == "full":
            text = text + "\n\n" + self._render_full_footer()
        return text


    def _render_full_footer(self) -> str:
        """Diagnostic footer appended when ``detail='full'``."""
        rows: list[str] = ["--- Fit details ---"]
        if self.init_info is not None:
            method = self.init_info.get("method", "?")
            iscale = self.init_info.get("scale", float("nan"))
            iiter = self.init_info.get("n_iter", "?")
            rows.append(f"  init method      : {method}")
            try:
                rows.append(f"  init scale       : {float(iscale):.6g}")  # ty: ignore[invalid-argument-type]
            except (TypeError, ValueError):
                rows.append(f"  init scale       : {iscale}")
            rows.append(f"  init n_iter      : {iiter}")
        rows.append(f"  MM n_iter        : {self.n_iter}")
        rows.append(f"  scale (M-est)    : {self.scale:.6g}")
        if self.engine_c is not None:
            rows.append(f"  engine_c         : {self.engine_c}")
        if self.rng is not None:
            rows.append(f"  rng              : {self.rng}")
        rows.append(f"  psi family       : {self.control.psi or '?'}")
        if self.control.tuning_psi is not None:
            rows.append(f"  tuning_psi       : {self.control.tuning_psi}")
        if self.control.tuning_chi is not None:
            rows.append(f"  tuning_chi       : {self.control.tuning_chi}")
        return "\n".join(rows)

    def _render_r(self) -> str:
        rows: list[str] = []
        rows.append(
            f"lmrob(method={self.control.method!r}, psi={self.control.psi!r}, "
            f"setting={self.control.setting!r})"
        )
        rows.append("")
        rows.append("Residuals:")
        if self.df_residual > 5:
            qs = np.quantile(self.residuals, [0.0, 0.25, 0.5, 0.75, 1.0])
            qline = "    Min      1Q  Median      3Q     Max"
            vline = "  ".join(f"{q:7.4g}" for q in qs)
            rows.append(qline)
            rows.append(vline)
        else:
            rows.append("  ".join(f"{r:7.4g}" for r in self.residuals))
        rows.append("")
        rows.append("Coefficients:")
        header = f"{'':<20} {'Estimate':>11} {'Std. Error':>12} {'t value':>9} {'Pr(>|t|)':>10}"
        rows.append(header)
        for name, row in zip(self.term_names, self.coefficients, strict=True):
            est, se, tv, pv = row
            rows.append(f"{name:<20} {est:11.5g} {se:12.5g} {tv:9.4g} {pv:10.4g}")
        rows.append("")
        rows.append(f"Robust residual standard error: {self.scale:.6g}")
        if self.has_intercept:
            rows.append(
                f"Multiple R-squared: {self.r_squared:.6g},  "
                f"Adjusted R-squared: {self.adj_r_squared:.6g}"
            )
        rows.append(
            f"Convergence in {self.n_iter} IRWLS iterations"
            if self.converged
            else "Algorithm did not converge"
        )
        rows.append(f"n = {self.nobs}, df residual = {self.df_residual}")
        return "\n".join(rows)

    def _render_statsmodels(self) -> str:
        """statsmodels-style fixed-width table."""
        from scipy.stats import norm

        z = norm.ppf(0.975)
        bar = "=" * 78
        sep = "-" * 78
        rows: list[str] = []
        rows.append(bar)
        rows.append(f"{'lmrob (MM-estimator)':^78}")
        rows.append(bar)
        rows.append(
            f"{'Method:':<20}{self.control.method or '?':<19}"
            f"{'Psi:':<20}{self.control.psi or '?':<19}"
        )
        rows.append(
            f"{'No. Observations:':<20}{self.nobs:<19}{'Df Residuals:':<20}{self.df_residual:<19}"
        )
        rows.append(
            f"{'R-squared (robust):':<20}{self.r_squared:<19.4f}"
            f"{'Adj. R-squared:':<20}{self.adj_r_squared:<19.4f}"
        )
        rows.append(
            f"{'Robust scale:':<20}{self.scale:<19.6g}{'Converged:':<20}{self.converged!s:<19}"
        )
        rows.append(sep)
        rows.append(
            f"{'':<20}{'coef':>10}{'std err':>10}{'t':>8}{'P>|t|':>9}{'[0.025':>10}{'0.975]':>10}"
        )
        rows.append(sep)
        for name, row in zip(self.term_names, self.coefficients, strict=True):
            est, se, tv, pv = row
            lo = est - z * se
            hi = est + z * se
            rows.append(
                f"{name:<20}{est:>10.4f}{se:>10.4f}{tv:>8.3f}{pv:>9.4f}{lo:>10.4f}{hi:>10.4f}"
            )
        rows.append(bar)
        return "\n".join(rows)

    def __repr__(self) -> str:
        return self.__str__()

    def __contains__(self, item: object) -> bool:
        if not isinstance(item, str):
            return False
        return item in str(self)



def make_summary(result: LmRobResults) -> SummaryLmRob:
    """Build a ``SummaryLmRob`` from an ``LmRobResults``."""
    from scipy.stats import t as t_dist

    coef = np.asarray(result.coef_, dtype=np.float64)
    cov = np.asarray(result.cov_, dtype=np.float64)
    se = np.sqrt(np.maximum(np.diag(cov), 0.0))
    df = int(result.df_residual_)

    if result.converged_:
        with np.errstate(divide="ignore", invalid="ignore"):
            tval = np.where(se > 0, coef / np.where(se > 0, se, 1.0), np.nan)
            pval = 2.0 * t_dist.sf(np.abs(tval), df=df)
    else:
        tval = np.full_like(coef, np.nan)
        pval = np.full_like(coef, np.nan)

    coef_table = np.column_stack([coef, se, tval, pval])

    has_intercept = bool(
        result.term_names_ and result.term_names_[0] in {"Intercept", "(Intercept)"}
    )

    psi_family = result.control.psi or "bisquare"
    psi_k = tuple(np.atleast_1d(np.asarray(result.control.tuning_psi, dtype=float)).ravel())
    correc = _correc_factor(psi_family, psi_k)

    y = result.fitted_ + result.residuals_
    if has_intercept and df > 0:
        wsum = float(np.sum(result.rweights_))
        resp_mean = float(np.sum(result.rweights_ * y) / wsum) if wsum > 0 else 0.0
        yMy = float(np.sum(result.rweights_ * (y - resp_mean) ** 2))
        rMr = float(np.sum(result.rweights_ * result.residuals_**2))
        denom = yMy + rMr * (correc - 1.0)
        r2 = (yMy - rMr) / denom if denom > 0 else 0.0
        df_int = 1
        adj = 1.0 - (1.0 - r2) * ((result.nobs_ - df_int) / df) if df > 0 else 0.0
    else:
        r2 = 0.0
        adj = 0.0

    return SummaryLmRob(
        coefficients=coef_table,
        term_names=list(result.term_names_),
        scale=float(result.scale_),
        r_squared=float(r2),
        adj_r_squared=float(adj),
        df_residual=df,
        nobs=int(result.nobs_),
        residuals=np.asarray(result.residuals_, dtype=np.float64),
        rweights=np.asarray(result.rweights_, dtype=np.float64),
        cov=cov,
        converged=bool(result.converged_),
        n_iter=int(result.n_iter_),
        control=result.control,
        has_intercept=has_intercept,
        init_info=dict(result.init_) if result.init_ else None,
        engine_c=bool(result.control.engine_c),
        rng=str(result.control.rng),
    )


__all__ = ["SummaryLmRob", "make_summary"]