U
    md#                     @   s  d Z ddlmZ ddlZddlmZ d?ddZdd Z	d	d
 Z
dZdZdd Zdd Zdd ZedkrdZejjdedfddefdefdefdefgZejjedfddefdefgZddlZejjjeefdd Zeed!feZ ej!j"D ]Z#ee# e e#< qej!j"D ]Z#ee# e e#< qed"e \Z$Z"ed#e \Z$Z"e'd$d% e"D Z(e()d!d&ejjed  Z*e+e*e(, Z-e&e-j. e&eee-  e'd'd% ed(e"D Z(e()d!d&ejjed  Z*e+e*e(, Z-e&e-j. e&eee-  e!d)efd*efd+efd,efd-efd.efd/efd0efgZ/ej0d1e/d2dd3Z1e&d4d5d% e1j!j"D  e1j2e3Z4e45d6e6e1j!j"7d!Z8e1e8 ddf j9Z:e&e:j; e&e:j! ed7e:\Z<Z=e'd8d% ed9e=D Z>e:d0 Z?e+e?e>, Z@e&e@j. e&eee@  d:Ae1j!j"dd6 ZBed;e:\ZCZDe'd<d% ed9eDD ZEe:d0 ZFe+eFeE, ZGe&eGj. e&eeeG  eDD ]TZHe&d=eHf e'd>d% eeHeDD ZIe:d0 ZJe+eJeI, ZKe&eeeK  q|dS )@a   convenience functions for ANOVA type analysis with OLS

Note: statistical results of ANOVA are not checked, OLS is
checked but not whether the reported results are the ones used
in ANOVA

includes form2design for creating dummy variables

TODO:
 * ...
 *

    )lmapNFc                 C   s^   |   } t| }|r0| dddf |ktS | dddf |ktddddf S dS )z|convert array of categories to dummy variables
    by default drops dummy variable for last category
    uses ravel, 1d onlyN)Zravelnpuniqueastypeint)x	returnallgroups r   e/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/statsmodels/sandbox/regression/try_ols_anova.py
data2dummy   s
    
r   c                 C   sL   t tt|  }| |dddddf kdjtddddf S )zcreates product dummy variables from 2 columns of 2d array

    drops last dummy variable, but not from each category
    singular with simple dummy variable but not with constant

    quickly written, no safeguards

    Nr   )	r   r   r   tupletolistallTr   r   )r   r
   r   r   r   data2proddummy   s    r   c                 C   s.   |j dkr|dddf }t| dd}|| S )zcreate dummy continuous variable

    Parameters
    ----------
    x1 : 1d array
        label or group array
    x2 : 1d array (float)
        continuous variable

    Notes
    -----
    useful for group specific slope coefficients in regression
       NT)r	   )ndimr   )x1Zx2dummyr   r   r   data2groupcont.   s    
r   aW  
ANOVA statistics (model sum of squares excludes constant)
Source    DF  Sum Squares   Mean Square    F Value    Pr > F
Model     %(df_model)i        %(ess)f       %(mse_model)f   %(fvalue)f %(f_pvalue)f
Error     %(df_resid)i     %(ssr)f       %(mse_resid)f
CTotal    %(nobs)i    %(uncentered_tss)f     %(mse_total)f

R squared  %(rsquared)f
a]  
ANOVA statistics (model sum of squares includes constant)
Source    DF  Sum Squares   Mean Square    F Value    Pr > F
Model     %(df_model)i      %(ssmwithmean)f       %(mse_model)f   %(fvalue)f %(f_pvalue)f
Error     %(df_resid)i     %(ssr)f       %(mse_resid)f
CTotal    %(nobs)i    %(uncentered_tss)f     %(mse_total)f

R squared  %(rsquared)f
c                 C   sb   i }| | j ddddddddd	d
dg}|D ]}t| |||< q.| jj|d< | j| j |d< |S )zjupdate regression results dictionary with ANOVA specific statistics

    not checked for completeness
    Zdf_modelZdf_residZessssruncentered_tssZ	mse_modelZ	mse_residZ	mse_totalZfvalueZf_pvalueZrsquarednobsZssmwithmean)update__dict__getattrmodelr   r   r   )resadZ
anova_attrkeyr   r   r   	anovadict[   s    
    r"   c                 C   sh  i }g }|   D ]L}|dkr>t|jd |d< |d qd|kr^|| ||< || q|dd dkr| dd }t|| ||< || q|dd d	kr| dd  d
}ttj||d  ||d  f |d|< |d| q|dd dkrV| dd  d
}t	||d  ||d  |d|< |d| qt
dq||fS )a  convert string formula to data dictionary

    ss : str
     * I : add constant
     * varname : for simple varnames data is used as is
     * F:varname : create dummy variables for factor varname
     * P:varname1*varname2 : create product dummy variables for
       varnames
     * G:varname1*varname2 : create product between factor and
       continuous variable
    data : dict or structured array
       data set, access of variables by name as in dictionaries

    Returns
    -------
    vars : dictionary
        dictionary of variables with converted dummy variables
    names : list
        list of names, product (P:) and grouped continuous
        variables (G:) have name by joining individual names
        sorted according to input

    Examples
    --------
    >>> xx, n = form2design('I a F:b P:c*d G:c*f', testdata)
    >>> xx.keys()
    ['a', 'b', 'const', 'cf', 'cd']
    >>> n
    ['const', 'a', 'b', 'cd', 'cf']

    Notes
    -----

    with sorted dict, separate name list would not be necessary
    Ir   const:N   zF:r   zP:* zG:zunknown expression in formula)splitr   Zonesshapeappendr   r   Zc_joinr   
ValueError)ssdatavarsnamesitemvr   r   r   form2designl   s.    $,$
r4   c                 C   s(   |dd }|   D ]}|| q|S )zwdrop names from a list of strings,
    names to drop are in space delimited list
    does not change original list
    N)r)   remove)r.   ZliZnewlir2   r   r   r   dropname   s    r6   __main__i        )sizeabcdr&   efT)flattenr   zI a F:b P:c*dzI a F:b P:c*d G:a*e fc                 C   s   g | ]}t | qS r   xx.0nnr   r   r   
<listcomp>   s     rG   g{Gz?c                 C   s   g | ]}t | qS r   rB   rD   r   r   r   rG      s     zae fZbreedZsexZlitterpenpigZageZbageyzdftest3.data.)missingZusemaskrL   c                 C   s   g | ]}t j|  qS r   )dtamasksum)rE   kr   r   r   rG      s     r   zI F:sex agec                 C   s   g | ]}t | qS r   )xx_b1rD   r   r   r   rG      s     r(    z'I F:breed F:sex F:litter F:pen age bagec                 C   s   g | ]}t | qS r   xx_b1arD   r   r   r   rG     s     z
Results droppingc                 C   s   g | ]}t | qS r   rS   rD   r   r   r   rG     s     )F)L__doc__Zstatsmodels.compat.pythonr   numpyr   Zstatsmodels.apiapismr   r   r   Z
anova_str0Z	anova_strr"   r4   r6   __name__r   randomrandintviewr   ZtestdataintnormalfloatZtestdatacontZnumpy.lib.recfunctionslibZrecfunctionsZ	zip_descrZdt2emptyZtestdataZdtyper1   namerC   nprintZcolumn_stackXrO   rJ   ZOLSfitZrest1paramsZdt_bZ
genfromtxtrM   rN   boolmZreshapelenanyZdroprowsr/   Z
dta_use_b1r*   rQ   Znames_b1ZX_b1Zy_b1Zrest_b1r,   ZallexogrT   Z	names_b1aZX_b1aZy_b1aZrest_b1aZdropnZX_b1a_Zy_b1a_Z	rest_b1a_r   r   r   r   <module>   s   

=

2$

   



