Source code for pylmrob.summary

# SPDX-License-Identifier: GPL-3.0-or-later
"""``summary.lmrob``-equivalent output for ``LmRobResults``.

Mirrors ``robustbase:::summary.lmrob`` and ``robustbase:::print.summary.lmrob``:

- Coefficient table with ``Estimate``, ``Std. Error``, ``t value``, ``Pr(>|t|)``
  using the t-distribution with ``df.residual`` degrees of freedom.
- Robust residual standard error = ``scale``.
- Robust multiple R-squared and adjusted R-squared computed from the rweights-
  weighted total and residual sum-of-squares with the family-specific
  correction factor ``E[wgt(r)] / E[r psi(r)]``.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

import numpy as np

if TYPE_CHECKING:
    from pylmrob.control import Control
    from pylmrob.results import LmRobResults


# R's summary.lmrob: tabulated correc factor for default tunings of each family.
# Values copied verbatim from robustbase R/lmrob.MM.R::summary.lmrob.
_R_SQUARED_CORREC_TABLE: dict[str, float] = {
    "bisquare": 1.207617,
    "biweight": 1.207617,
    "welsh": 1.224617,
    "optimal": 1.068939,
    "hampel": 1.166891,
    "lqq": 1.159232,
}


def _ggw_correc(tuning: tuple[float, ...]) -> float | None:
    """R's summary.lmrob ggw special-case: four hand-tabulated tunings, else None."""
    if len(tuning) != 4:
        return None
    table = {
        (-0.5, 1.0, 0.95): 1.121708,
        (-0.5, 1.5, 0.95): 1.163192,
        (-0.5, 1.0, 0.85): 1.33517,
        (-0.5, 1.5, 0.85): 1.395828,
    }
    key = (float(tuning[0]), float(tuning[1]), float(tuning[2]))
    return table.get(key)


def _numeric_correc(psi_family: str, tuning: tuple[float, ...]) -> float:
    """Generic ``E[wgt(r)] / E[r psi(r)]`` under the standard normal."""
    from scipy import integrate

    from pylmrob.psi import psi as _psi
    from pylmrob.psi import wgt as _wgt

    inv_sqrt2pi = 1.0 / np.sqrt(2.0 * np.pi)

    def num(t: float) -> float:
        return (
            float(_wgt(np.array([t]), psi_family, tuning)[0]) * np.exp(-0.5 * t * t) * inv_sqrt2pi
        )

    def den(t: float) -> float:
        psi_val = float(_psi(np.array([t]), psi_family, tuning)[0])
        return t * psi_val * np.exp(-0.5 * t * t) * inv_sqrt2pi

    enum, _ = integrate.quad(num, -20.0, 20.0, epsabs=1e-10)
    eden, _ = integrate.quad(den, -20.0, 20.0, epsabs=1e-10)
    if eden <= 0:
        return 1.0
    return enum / eden


def _is_default_tuning(psi_family: str, tuning: tuple[float, ...]) -> bool:
    from pylmrob.psi import _PSI_TUNING_DEFAULT_PSI

    default = _PSI_TUNING_DEFAULT_PSI.get(psi_family.lower())
    if default is None or len(default) != len(tuning):
        return False
    return all(abs(a - b) < 1e-10 for a, b in zip(default, tuning, strict=True))


def _correc_factor(psi_family: str, tuning: tuple[float, ...]) -> float:
    fam = psi_family.lower()
    if fam == "ggw":
        # R uses a literal-tuning lookup for ggw, independent of our default-table check.
        ggw = _ggw_correc(tuning)
        if ggw is not None:
            return ggw
        return _numeric_correc(psi_family, tuning)
    if fam in _R_SQUARED_CORREC_TABLE and _is_default_tuning(psi_family, tuning):
        return _R_SQUARED_CORREC_TABLE[fam]
    return _numeric_correc(psi_family, tuning)


[docs] @dataclass class SummaryLmRob: """``summary.lmrob`` analogue. Stringifies to an R-style printout.""" coefficients: np.ndarray term_names: list[str] scale: float r_squared: float adj_r_squared: float df_residual: int nobs: int residuals: np.ndarray rweights: np.ndarray cov: np.ndarray converged: bool n_iter: int control: Control has_intercept: bool style: str = "r" # "r" (default) or "statsmodels" # ``detail="brief"`` (default) matches the historic output. ``"full"`` # appends a small footer with init method, init scale, init iter count, # MM iter count, and the engine/RNG that drove the fit; useful for # debugging convergence issues without ``trace_lev=3``. detail: str = "brief" init_info: dict[str, object] | None = None engine_c: bool | None = None rng: str | None = None def __str__(self) -> str: return self.render(style=self.style, detail=self.detail)
[docs] def render(self, *, style: str = "r", detail: str = "brief") -> str: """Render the summary table. Parameters ---------- style ``"r"`` (default): R-style ``summary.lmrob`` output. ``"statsmodels"``: a fixed-width table matching the ``statsmodels.iolib.summary.Summary`` layout for users who pipe pylmrob fits into statsmodels-shaped reporting code. detail ``"brief"`` (default) emits the standard summary. ``"full"`` appends a footer with the init method, init scale, MM iter count, and engine settings (engine_c, rng). Use this when debugging convergence. """ if detail not in ("brief", "full"): raise ValueError(f"detail must be 'brief' or 'full', got {detail!r}") if style == "statsmodels": text = self._render_statsmodels() elif style == "r": text = self._render_r() else: raise ValueError(f"style must be 'r' or 'statsmodels', got {style!r}") if detail == "full": text = text + "\n\n" + self._render_full_footer() return text
def _render_full_footer(self) -> str: """Diagnostic footer appended when ``detail='full'``.""" rows: list[str] = ["--- Fit details ---"] if self.init_info is not None: method = self.init_info.get("method", "?") iscale = self.init_info.get("scale", float("nan")) iiter = self.init_info.get("n_iter", "?") rows.append(f" init method : {method}") try: rows.append(f" init scale : {float(iscale):.6g}") # ty: ignore[invalid-argument-type] except (TypeError, ValueError): rows.append(f" init scale : {iscale}") rows.append(f" init n_iter : {iiter}") rows.append(f" MM n_iter : {self.n_iter}") rows.append(f" scale (M-est) : {self.scale:.6g}") if self.engine_c is not None: rows.append(f" engine_c : {self.engine_c}") if self.rng is not None: rows.append(f" rng : {self.rng}") rows.append(f" psi family : {self.control.psi or '?'}") if self.control.tuning_psi is not None: rows.append(f" tuning_psi : {self.control.tuning_psi}") if self.control.tuning_chi is not None: rows.append(f" tuning_chi : {self.control.tuning_chi}") return "\n".join(rows) def _render_r(self) -> str: rows: list[str] = [] rows.append( f"lmrob(method={self.control.method!r}, psi={self.control.psi!r}, " f"setting={self.control.setting!r})" ) rows.append("") rows.append("Residuals:") if self.df_residual > 5: qs = np.quantile(self.residuals, [0.0, 0.25, 0.5, 0.75, 1.0]) qline = " Min 1Q Median 3Q Max" vline = " ".join(f"{q:7.4g}" for q in qs) rows.append(qline) rows.append(vline) else: rows.append(" ".join(f"{r:7.4g}" for r in self.residuals)) rows.append("") rows.append("Coefficients:") header = f"{'':<20} {'Estimate':>11} {'Std. Error':>12} {'t value':>9} {'Pr(>|t|)':>10}" rows.append(header) for name, row in zip(self.term_names, self.coefficients, strict=True): est, se, tv, pv = row rows.append(f"{name:<20} {est:11.5g} {se:12.5g} {tv:9.4g} {pv:10.4g}") rows.append("") rows.append(f"Robust residual standard error: {self.scale:.6g}") if self.has_intercept: rows.append( f"Multiple R-squared: {self.r_squared:.6g}, " f"Adjusted R-squared: {self.adj_r_squared:.6g}" ) rows.append( f"Convergence in {self.n_iter} IRWLS iterations" if self.converged else "Algorithm did not converge" ) rows.append(f"n = {self.nobs}, df residual = {self.df_residual}") return "\n".join(rows) def _render_statsmodels(self) -> str: """statsmodels-style fixed-width table.""" from scipy.stats import norm z = norm.ppf(0.975) bar = "=" * 78 sep = "-" * 78 rows: list[str] = [] rows.append(bar) rows.append(f"{'lmrob (MM-estimator)':^78}") rows.append(bar) rows.append( f"{'Method:':<20}{self.control.method or '?':<19}" f"{'Psi:':<20}{self.control.psi or '?':<19}" ) rows.append( f"{'No. Observations:':<20}{self.nobs:<19}{'Df Residuals:':<20}{self.df_residual:<19}" ) rows.append( f"{'R-squared (robust):':<20}{self.r_squared:<19.4f}" f"{'Adj. R-squared:':<20}{self.adj_r_squared:<19.4f}" ) rows.append( f"{'Robust scale:':<20}{self.scale:<19.6g}{'Converged:':<20}{self.converged!s:<19}" ) rows.append(sep) rows.append( f"{'':<20}{'coef':>10}{'std err':>10}{'t':>8}{'P>|t|':>9}{'[0.025':>10}{'0.975]':>10}" ) rows.append(sep) for name, row in zip(self.term_names, self.coefficients, strict=True): est, se, tv, pv = row lo = est - z * se hi = est + z * se rows.append( f"{name:<20}{est:>10.4f}{se:>10.4f}{tv:>8.3f}{pv:>9.4f}{lo:>10.4f}{hi:>10.4f}" ) rows.append(bar) return "\n".join(rows) def __repr__(self) -> str: return self.__str__() def __contains__(self, item: object) -> bool: if not isinstance(item, str): return False return item in str(self)
def make_summary(result: LmRobResults) -> SummaryLmRob: """Build a ``SummaryLmRob`` from an ``LmRobResults``.""" from scipy.stats import t as t_dist coef = np.asarray(result.coef_, dtype=np.float64) cov = np.asarray(result.cov_, dtype=np.float64) se = np.sqrt(np.maximum(np.diag(cov), 0.0)) df = int(result.df_residual_) if result.converged_: with np.errstate(divide="ignore", invalid="ignore"): tval = np.where(se > 0, coef / np.where(se > 0, se, 1.0), np.nan) pval = 2.0 * t_dist.sf(np.abs(tval), df=df) else: tval = np.full_like(coef, np.nan) pval = np.full_like(coef, np.nan) coef_table = np.column_stack([coef, se, tval, pval]) has_intercept = bool( result.term_names_ and result.term_names_[0] in {"Intercept", "(Intercept)"} ) psi_family = result.control.psi or "bisquare" psi_k = tuple(np.atleast_1d(np.asarray(result.control.tuning_psi, dtype=float)).ravel()) correc = _correc_factor(psi_family, psi_k) y = result.fitted_ + result.residuals_ if has_intercept and df > 0: wsum = float(np.sum(result.rweights_)) resp_mean = float(np.sum(result.rweights_ * y) / wsum) if wsum > 0 else 0.0 yMy = float(np.sum(result.rweights_ * (y - resp_mean) ** 2)) rMr = float(np.sum(result.rweights_ * result.residuals_**2)) denom = yMy + rMr * (correc - 1.0) r2 = (yMy - rMr) / denom if denom > 0 else 0.0 df_int = 1 adj = 1.0 - (1.0 - r2) * ((result.nobs_ - df_int) / df) if df > 0 else 0.0 else: r2 = 0.0 adj = 0.0 return SummaryLmRob( coefficients=coef_table, term_names=list(result.term_names_), scale=float(result.scale_), r_squared=float(r2), adj_r_squared=float(adj), df_residual=df, nobs=int(result.nobs_), residuals=np.asarray(result.residuals_, dtype=np.float64), rweights=np.asarray(result.rweights_, dtype=np.float64), cov=cov, converged=bool(result.converged_), n_iter=int(result.n_iter_), control=result.control, has_intercept=has_intercept, init_info=dict(result.init_) if result.init_ else None, engine_c=bool(result.control.engine_c), rng=str(result.control.rng), ) __all__ = ["SummaryLmRob", "make_summary"]