In [1]: import numpy as np
In [1]: from scipy import stats
In [1]: import statsmodels.api as sm
In [1]: import matplotlib.pyplot as plt
In [1]: from statsmodels.sandbox.regression.predstd import wls_prediction_std
In [1]: from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)
In [1]: np.random.seed(1024)
Model assumptions:
In [1]: nsample = 50
In [1]: x = np.linspace(0, 20, nsample)
In [1]: X = np.c_[x, (x - 5)**2, np.ones(nsample)]
In [1]: beta = [0.5, -0.01, 5.]
In [1]: sig = 0.5
In [1]: w = np.ones(nsample)
In [1]: w[nsample * 6 / 10:] = 3
In [1]: y_true = np.dot(X, beta)
In [1]: e = np.random.normal(size=nsample)
In [1]: y = y_true + sig * w * e
In [1]: X = X[:, [0, 2]]
In [1]: mod_wls = sm.WLS(y, X, weights=1. / w)
In [1]: res_wls = mod_wls.fit()
In [1]: print res_wls.summary()
Estimate an OLS model for comparison
In [1]: res_ols = sm.OLS(y, X).fit()
Compare the estimated parameters in WLS and OLS
In [1]: print res_ols.params
In [1]: print res_wls.params
Compare the WLS standard errors to heteroscedasticity corrected OLS standard errors:
In [1]: se = np.vstack([[res_wls.bse], [res_ols.bse], [res_ols.HC0_se],
...: [res_ols.HC1_se], [res_ols.HC2_se], [res_ols.HC3_se]])
...:
In [1]: se = np.round(se, 4)
In [1]: colnames = 'x1', 'const'
In [1]: rownames = 'WLS', 'OLS', 'OLS_HC0', 'OLS_HC1', 'OLS_HC3', 'OLS_HC3'
In [1]: tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)
In [1]: print tabl
Calculate OLS prediction interval
In [1]: covb = res_ols.cov_params()
In [1]: prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1)
In [1]: prediction_std = np.sqrt(prediction_var)
In [1]: tppf = stats.t.ppf(0.975, res_ols.df_resid)
Draw a plot to compare predicted values in WLS and OLS:
In [1]: prstd, iv_l, iv_u = wls_prediction_std(res_wls)
In [1]: plt.figure();
In [1]: plt.plot(x, y, 'o', x, y_true, 'b-');
In [1]: plt.plot(x, res_ols.fittedvalues, 'r--');
In [1]: plt.plot(x, res_ols.fittedvalues + tppf * prediction_std, 'r--');
In [1]: plt.plot(x, res_ols.fittedvalues - tppf * prediction_std, 'r--');
In [1]: plt.plot(x, res_wls.fittedvalues, 'g--.');
In [1]: plt.plot(x, iv_u, 'g--');
In [1]: plt.plot(x, iv_l, 'g--');
In [1]: plt.title('blue: true, red: OLS, green: WLS');
In [1]: resid1 = res_ols.resid[w == 1.]
In [1]: var1 = resid1.var(ddof=int(res_ols.df_model) + 1)
In [1]: resid2 = res_ols.resid[w != 1.]
In [1]: var2 = resid2.var(ddof=int(res_ols.df_model) + 1)
In [1]: w_est = w.copy()
In [1]: w_est[w != 1.] = np.sqrt(var2) / np.sqrt(var1)
In [1]: res_fwls = sm.WLS(y, X, 1. / w_est).fit()
In [1]: print res_fwls.summary()