################################################################ # This lmnh.R file defines an lme object, which is like an lm # object, except that it has a more detailed summary method. # In particular, it also provides normalized coefficients (the n) and # White heteroskedasticity (the h). # # Noone who is a real statistician and R expert will find this # useful. It is written for casual users and R amateurs. # Indeed, nothing in this source file is original. everything # was stolen from the excellent r-help mailing list. # # Synopsis: # # y=rnorm(10); x=rnorm(10); m=lme(y ~ x); summary(m); # ################################################################ library(car); # load the heteroskedasticity function hccm lmnh <- function(...) { lm.object= lm(...); class(lm.object) <- "lmnh"; return (lm.object); } print.lmnh <- function(...) { x=print.lm(...) cat("\n[Note: this lmnh class has a more detailed summary method than the lm class]\n"); return(x); } # the following two functions were never used, but seem like a good idea as.lm <- function( lmnh.object ) { stopifnot( class(lmnh.object) == "lmnh" ); # wouldn't it be nice if this allowed a specific warning error? copy=lmnh.object class(copy) <- "lm"; return(copy); } as.lmnh <- function( lm.object ) { stopifnot( class(lm.object) == "lm" ); # wouldn't it be nice if this allowed a specific warning error? copy=lm.object class(copy) <- "lmnh"; return(copy); } ################################################################ # this method is invoked when you do "ms= summary(m)" # # small appeal to the R authors: define some basic hooks in the # lm summary functions themselves (e.g., add.coefstatistics and # add.finalstatistics). ################################################################ summary.lmnh <- function (object, correlation = FALSE, symbolic.cor = FALSE, ...) { # CHANGE #1: colnames were expanded by NormEst, t-htsk (heteroskedasticity), and Pr(>|th|) colnames= c("Estimate", "NormEst", "Std. Error", "t-ols", "Pr(>|t|)", "t-htsk", "Pr(>|th|)"); z <- object p <- z$rank if (p == 0) { r <- z$residuals n <- length(r) w <- z$weights if (is.null(w)) { rss <- sum(r^2) } else { rss <- sum(w * r^2) r <- sqrt(w) * r } resvar <- rss/(n - p) ans <- z[c("call", "terms")] class(ans) <- "summary.lm" ans$aliased <- is.na(coef(object)) ans$residuals <- r ans$df <- c(0, n, length(ans$aliased)) ans$coefficients <- matrix(NA, 0, length(colnames)) # CHANGE #2: 4 has become 7 dimnames(ans$coefficients) <- list(NULL, colnames) ans$sigma <- sqrt(resvar) ans$r.squared <- ans$adj.r.squared <- 0 return(ans) } Qr <- object$qr if (is.null(z$terms) || is.null(Qr)) stop("invalid 'lm' object: no 'terms' nor 'qr' component") n <- NROW(Qr$qr) rdf <- n - p if (is.na(z$df.residual) || rdf != z$df.residual) warning("residual degrees of freedom in object suggest this is not an \"lm\" fit") p1 <- 1:p r <- z$residuals f <- z$fitted w <- z$weights if (is.null(w)) { mss <- if (attr(z$terms, "intercept")) sum((f - mean(f))^2) else sum(f^2) rss <- sum(r^2) } else { mss <- if (attr(z$terms, "intercept")) { m <- sum(w * f/sum(w)) sum(w * (f - m)^2) } else sum(w * f^2) rss <- sum(w * r^2) r <- sqrt(w) * r } resvar <- rss/rdf R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) se <- sqrt(diag(R) * resvar) est <- z$coefficients[Qr$pivot[p1]] tval <- est/se ################################################################ # CHANGE #3: here come my new columns: normalized coefficients # and white heteroskedasticity adjustd coefficients ################################################################ normalized.coefs.lm <- function(lmnh.obj) { ## We do not want to include the intercept. coefs <- coef(lmnh.obj)[-1] m <- model.frame(lmnh.obj) # get the data used for the fit. SD <- apply(m, 2, sd) normCoefs <- coefs * SD[-1] / SD[1] normCoefs <- c(0, normCoefs); # add a 0 back for the normalized intercept; invisible(normCoefs) } est.normed <- normalized.coefs.lm( object ); # this may not work; it may ignore pivots; not sure # now let's add on white heteroskedasticity adjusted estimates cheat.object= object; class(cheat.object) <- "lm"; se.w.step1= try(hccm( cheat.object , type="hc0"), TRUE); if(inherits(se.w.step1, "try-error")) { cat("sorry, but the hccm step failed\n"); tval.white <- rep( NA, length(est) ); } else { se.white= sqrt(diag(se.w.step1)); tval.white= est/ se.white; } ################################################################ # No more changes ################################################################ ans <- z[c("call", "terms")] ans$residuals <- r ans$coefficients <- cbind(est, est.normed, se, tval, 2 * pt( abs(tval), rdf, lower.tail = FALSE), tval.white, 2 * pt(abs(tval.white), rdf, lower.tail = FALSE)) dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]], colnames) ans$aliased <- is.na(coef(object)) ans$sigma <- sqrt(resvar) ans$df <- c(p, rdf, NCOL(Qr$qr)) if (p != attr(z$terms, "intercept")) { df.int <- if (attr(z$terms, "intercept")) 1 else 0 ans$r.squared <- mss/(mss + rss) ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - df.int)/rdf) ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, numdf = p - df.int, dendf = rdf) } else ans$r.squared <- ans$adj.r.squared <- 0 ans$cov.unscaled <- R dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,1)] if (correlation) { ans$correlation <- (R * resvar)/outer(se, se) dimnames(ans$correlation) <- dimnames(ans$cov.unscaled) ans$symbolic.cor <- symbolic.cor } if (!is.null(z$na.action)) ans$na.action <- z$na.action class(ans) <- "summary.lmnh" ans } ######################################################################## # # this function is invoked by R when you do (after ms=summary(m)) a print $ms # similarly, if you do summary( lmnh( y ~ x ) ) will first invoke the # summary.lmnh function and then the print.summary.lmnh function. In my case, # I don't need anything new---except the darn rounding needs to be different. # #################################################################### print.summary.lmnh <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...) { cat("\nCall:\n") cat(paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") resid <- x$residuals df <- x$df rdf <- df[2] cat(if (!is.null(x$w) && diff(range(x$w))) "Weighted ", "Residuals:\n", sep = "") if (rdf > 5) { nam <- c("Min", "1Q", "Median", "3Q", "Max") rq <- if (length(dim(resid)) == 2) structure(apply(t(resid), 1, quantile), dimnames = list(nam, dimnames(resid)[[2]])) else structure(quantile(resid), names = nam) print(rq, digits = digits, ...) } else if (rdf > 0) { print(resid, digits = digits, ...) } else { cat("ALL", df[1], "residuals are 0: no residual degrees of freedom!\n") } if (length(x$aliased) == 0) { cat("\nNo Coefficients\n") } else { if (nsingular <- df[3] - df[1]) cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", sep = "") else cat("\nCoefficients:\n") coefs <- x$coefficients if (!is.null(aliased <- x$aliased) && any(aliased)) { cn <- names(aliased) coefs <- matrix(NA, length(aliased), 4, dimnames = list(cn, colnames(coefs))) coefs[!aliased, ] <- x$coefficients } printCoefmat.lmnh(coefs) ### needed to be changed } cat("\nResidual standard error:", format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") if (nchar(mess <- naprint(x$na.action))) cat(" (", mess, ")\n", sep = "") if (!is.null(x$fstatistic)) { cat("Multiple R-Squared:", formatC(x$r.squared, digits = digits)) cat(",\tAdjusted R-squared:", formatC(x$adj.r.squared, digits = digits), "\nF-statistic:", formatC(x$fstatistic[1], digits = digits), "on", x$fstatistic[2], "and", x$fstatistic[3], "DF, p-value:", format.pval(pf(x$fstatistic[1], x$fstatistic[2], x$fstatistic[3], lower.tail = FALSE), digits = digits), "\n") } correl <- x$correlation if (!is.null(correl)) { p <- NCOL(correl) if (p > 1) { cat("\nCorrelation of Coefficients:\n") if (is.logical(symbolic.cor) && symbolic.cor) { print(symnum(correl, abbr.col = NULL)) } else { correl <- format(round(correl, 2), nsmall = 2, digits = digits) correl[!lower.tri(correl)] <- "" print(correl[-1, -p, drop = FALSE], quote = FALSE) } } } cat("\n") invisible(x) } #### this is really bad. it is also barebones, and has lost all use of options printCoefmat.lmnh <- function (x) { if (is.null(d <- dim(x)) || length(d) != 2) stop("'x' must be coefficient matrix/data frame") # the header line cat( " "); cat( sprintf("%-10s", names(x[1,1:3])) ); cat( " " ); cat( sprintf("%6s ", names(x[1,4:7])) ); cat("\n"); # the coefficient lines for (i in 1: nrow(x)) { cat( sprintf("%-14s", names(x[,1])[i]) ); cat( sprintf("%10.6f ", c( x[i,1],x[i,2],x[i,3] )), sep=" " ); # est, normest, stderr cat( sprintf("%7.3f", c( x[i,4] )), " " ); # T cat( sprintf("%7.3f", c( x[i,5] )), " " ); # Pr cat( sprintf("%7.3f", c( x[i,6] )), " " ); # T cat( sprintf("%7.3f", c( x[i,7] )), " " ); # Pr cat("\n"); } return(invisible(x)); } ################################################################ DEMONSTRATE= 1; # set this to 0 afterwards if (DEMONSTRATE) { N= 50; cat("\n\n> y= rnorm(15, sd=20); x= rnorm(15, sd=5); z= rnorm(15, sd=10);\n"); y= rnorm(N, sd=20); x= rnorm(N, sd=5); z= rnorm(N, sd=10); cat("\n> model= lmnh( y ~ x + z );\n"); model= lmnh( y ~ x + z ); cat("\n> print(model)\n"); print(model); cat("\n\n> print(summary(model))\n"); print(summary(model)); } cat("[loaded the lmnh.R class for more lm statistics (normalized and heteroskedasticity)]\n");