Prediction (out of sample)
==========================


.. _predict_notebook:

`Link to Notebook GitHub <https://github.com/statsmodels/statsmodels/blob/master/examples/notebooks/predict.ipynb>`_

.. raw:: html

   
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[1]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="kn">from</span> <span class="nn">__future__</span> <span class="kn">import</span> <span class="n">print_function</span>
   <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
   <span class="kn">import</span> <span class="nn">statsmodels.api</span> <span class="kn">as</span> <span class="nn">sm</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="artificial-data">Artificial data</h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[2]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="n">nsample</span> <span class="o">=</span> <span class="mi">50</span>
   <span class="n">sig</span> <span class="o">=</span> <span class="mf">0.25</span>
   <span class="n">x1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="n">nsample</span><span class="p">)</span>
   <span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">column_stack</span><span class="p">((</span><span class="n">x1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x1</span><span class="p">),</span> <span class="p">(</span><span class="n">x1</span><span class="o">-</span><span class="mi">5</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span>
   <span class="n">X</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">add_constant</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
   <span class="n">beta</span> <span class="o">=</span> <span class="p">[</span><span class="mf">5.</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.02</span><span class="p">]</span>
   <span class="n">y_true</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">beta</span><span class="p">)</span>
   <span class="n">y</span> <span class="o">=</span> <span class="n">y_true</span> <span class="o">+</span> <span class="n">sig</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="n">nsample</span><span class="p">)</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="estimation">Estimation</h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[3]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="n">olsmod</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">OLS</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">X</span><span class="p">)</span>
   <span class="n">olsres</span> <span class="o">=</span> <span class="n">olsmod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
   <span class="k">print</span><span class="p">(</span><span class="n">olsres</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   <div class="output_subarea output_stream output_stdout output_text">
   <pre>
                               OLS Regression Results                            
   ==============================================================================
   Dep. Variable:                      y   R-squared:                       0.983
   Model:                            OLS   Adj. R-squared:                  0.981
   Method:                 Least Squares   F-statistic:                     867.5
   Date:                Tue, 25 Aug 2015   Prob (F-statistic):           1.78e-40
   Time:                        00:45:27   Log-Likelihood:                 1.0243
   No. Observations:                  50   AIC:                             5.951
   Df Residuals:                      46   BIC:                             13.60
   Df Model:                           3                                         
   Covariance Type:            nonrobust                                         
   ==============================================================================
                    coef    std err          t      P&gt;|t|      [95.0% Conf. Int.]
   ------------------------------------------------------------------------------
   const          5.0830      0.084     60.339      0.000         4.913     5.253
   x1             0.4953      0.013     38.125      0.000         0.469     0.521
   x2             0.4219      0.051      8.260      0.000         0.319     0.525
   x3            -0.0208      0.001    -18.221      0.000        -0.023    -0.018
   ==============================================================================
   Omnibus:                        1.746   Durbin-Watson:                   1.889
   Prob(Omnibus):                  0.418   Jarque-Bera (JB):                0.934
   Skew:                          -0.255   Prob(JB):                        0.627
   Kurtosis:                       3.435   Cond. No.                         221.
   ==============================================================================
   
   Warnings:
   [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
   
   </pre>
   </div>
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="in-sample-prediction">In-sample prediction</h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[4]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="n">ypred</span> <span class="o">=</span> <span class="n">olsres</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
   <span class="k">print</span><span class="p">(</span><span class="n">ypred</span><span class="p">)</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   <div class="output_subarea output_stream output_stdout output_text">
   <pre>
   [  4.56341717   5.0144075    5.43096156   5.79008837   6.0770943
      6.28799715   6.43018041   6.52118022   6.58580416   6.6520555
      6.74653231   6.89005748   7.09425756   7.3596526    7.67557064
      8.02190114   8.37239905   8.69899681   8.97641578   9.18631944
      9.320326     9.38138557   9.3832954    9.34843307   9.3040799
      9.27793872   9.29358236   9.36658069   9.50194257   9.69329289
      9.92391996  10.16952124  10.40219613  10.59502999  10.72651615
     10.78408934  10.7661902   10.68252142  10.55245366  10.40184296
     10.2587827   10.14898794  10.09157047  10.09589886  10.16005915
     10.27116937  10.40749601  10.54202474  10.64689879  10.69799578]
   
   </pre>
   </div>
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="create-a-new-sample-of-explanatory-variables-xnew-predict-and-plot">Create a new sample of explanatory variables Xnew, predict and plot</h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[5]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="n">x1n</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mf">20.5</span><span class="p">,</span><span class="mi">25</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
   <span class="n">Xnew</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">column_stack</span><span class="p">((</span><span class="n">x1n</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x1n</span><span class="p">),</span> <span class="p">(</span><span class="n">x1n</span><span class="o">-</span><span class="mi">5</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span>
   <span class="n">Xnew</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">add_constant</span><span class="p">(</span><span class="n">Xnew</span><span class="p">)</span>
   <span class="n">ynewpred</span> <span class="o">=</span>  <span class="n">olsres</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">Xnew</span><span class="p">)</span> <span class="c"># predict out of sample</span>
   <span class="k">print</span><span class="p">(</span><span class="n">ynewpred</span><span class="p">)</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   <div class="output_subarea output_stream output_stdout output_text">
   <pre>
   [ 10.66407583  10.5168051   10.27272731   9.96954351   9.65688155
      9.38414547   9.18841962   9.08538917   9.06549979   9.09629691]
   
   </pre>
   </div>
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="plot-comparison">Plot comparison</h2>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[6]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
   
   <span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x1</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="s">&#39;o&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Data&quot;</span><span class="p">)</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x1</span><span class="p">,</span> <span class="n">y_true</span><span class="p">,</span> <span class="s">&#39;b-&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;True&quot;</span><span class="p">)</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">((</span><span class="n">x1</span><span class="p">,</span> <span class="n">x1n</span><span class="p">)),</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">((</span><span class="n">ypred</span><span class="p">,</span> <span class="n">ynewpred</span><span class="p">)),</span> <span class="s">&#39;r&#39;</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;OLS prediction&quot;</span><span class="p">)</span>
   <span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s">&quot;best&quot;</span><span class="p">);</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt"></div>
   
   
   <div class="output_text output_subarea ">
   <pre>
   &lt;matplotlib.figure.Figure at 0x7f90c6ad2890&gt;
   </pre>
   </div>
   
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <h2 id="Predicting-with-Formulas">Predicting with Formulas<a class="anchor-link" href="#Predicting-with-Formulas">&#182;</a></h2>
   </div>
   </div>
   </div>
   
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <p>Using formulas can make both estimation and prediction a lot easier</p>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[7]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="kn">from</span> <span class="nn">statsmodels.formula.api</span> <span class="kn">import</span> <span class="n">ols</span>
   
   <span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="s">&quot;x1&quot;</span> <span class="p">:</span> <span class="n">x1</span><span class="p">,</span> <span class="s">&quot;y&quot;</span> <span class="p">:</span> <span class="n">y</span><span class="p">}</span>
   
   <span class="n">res</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s">&quot;y ~ x1 + np.sin(x1) + I((x1-5)**2)&quot;</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <p>We use the <code>I</code> to indicate use of the Identity transform. Ie., we don&#39;t want any expansion magic from using <code>**2</code></p>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[8]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="n">res</span><span class="o">.</span><span class="n">params</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt output_prompt">
       Out[8]:</div>
   
   
   <div class="output_text output_subarea output_pyout">
   <pre>
   Intercept           5.083036
   x1                  0.495320
   np.sin(x1)          0.421859
   I((x1 - 5) ** 2)   -0.020785
   dtype: float64
   </pre>
   </div>
   
   </div>
   
   </div>
   </div>
   
   </div>
   <div class="cell border-box-sizing text_cell rendered">
   <div class="prompt input_prompt">
   </div>
   <div class="inner_cell">
   <div class="text_cell_render border-box-sizing rendered_html">
   <p>Now we only have to pass the single variable and we get the transformed right-hand side variables automatically</p>
   </div>
   </div>
   </div>
   <div class="cell border-box-sizing code_cell rendered">
   <div class="input">
   <div class="prompt input_prompt">
   In&nbsp;[9]:
   </div>
   <div class="inner_cell">
       <div class="input_area">
   <div class="highlight"><pre><span class="n">res</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">exog</span><span class="o">=</span><span class="nb">dict</span><span class="p">(</span><span class="n">x1</span><span class="o">=</span><span class="n">x1n</span><span class="p">))</span>
   </pre></div>
   
   </div>
   </div>
   </div>
   
   <div class="output_wrapper">
   <div class="output">
   
   
   <div class="output_area"><div class="prompt output_prompt">
       Out[9]:</div>
   
   
   <div class="output_text output_subarea output_pyout">
   <pre>
   array([ 10.66407583,  10.5168051 ,  10.27272731,   9.96954351,
            9.65688155,   9.38414547,   9.18841962,   9.08538917,
            9.06549979,   9.09629691])
   </pre>
   </div>
   
   </div>
   
   </div>
   </div>
   
   </div>

   <script src="https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"type="text/javascript"></script>
   <script type="text/javascript">
   init_mathjax = function() {
       if (window.MathJax) {
           // MathJax loaded
           MathJax.Hub.Config({
               tex2jax: {
               // I'm not sure about the \( and \[ below. It messes with the
               // prompt, and I think it's an issue with the template. -SS
                   inlineMath: [ ['$','$'], ["\\(","\\)"] ],
                   displayMath: [ ['$$','$$'], ["\\[","\\]"] ]
               },
               displayAlign: 'left', // Change this to 'center' to center equations.
               "HTML-CSS": {
                   styles: {'.MathJax_Display': {"margin": 0}}
               }
           });
           MathJax.Hub.Queue(["Typeset",MathJax.Hub]);
       }
   }
   init_mathjax();

   // since we have to load this in a ..raw:: directive we will add the css
   // after the fact
   function loadcssfile(filename){
       var fileref=document.createElement("link")
       fileref.setAttribute("rel", "stylesheet")
       fileref.setAttribute("type", "text/css")
       fileref.setAttribute("href", filename)

       document.getElementsByTagName("head")[0].appendChild(fileref)
   }
   // loadcssfile({{pathto("_static/nbviewer.pygments.css", 1) }})
   // loadcssfile({{pathto("_static/nbviewer.min.css", 1) }})
   loadcssfile("../../../_static/nbviewer.pygments.css")
   loadcssfile("../../../_static/ipython.min.css")
   </script>