U
    md                     @   s  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ d	d
dddddddddgZdd Zd(ddZdd Zedkred ed eD ]TZee ee\ZZZZZeeeZdddd ed!Zejjeeeed" qed# eD ]RZee ee\ZZZZZd$d% eD Z ej!e  Zeeeedd&   qed' ed edd D ]<Zee ee\ZZZZZeeeZeeee  qtdS ))zcalculating anova and verifying with NIST test data

compares my implementations, stats.f_oneway and anova using statsmodels.OLS
    )lmapN)stats)add_constant)OLS   )
data2dummyz
SiRstv.datz
SmLs01.datz
SmLs02.datz
SmLs03.datzAtmWtAg.datz
SmLs04.datz
SmLs05.datz
SmLs06.dat
SmLs07.dat
SmLs08.dat
SmLs09.datc              	   C   s   t jt}t jt j|d| }t|ddd}| d}W 5 Q R X dd |dd  D }d	d |d
d D }t	j
|dd}|j\}}	|t}t	|}
t|d d }t|d d }t|d d }t|d d }t|d d }tj|||}||	t	||||g||
fS )Ndatarzutf-8)encoding
c                 S   s   g | ]}|  qS  split.0liner   r   k/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/statsmodels/sandbox/regression/anova_nistcertified.py
<listcomp>   s     zgetnist.<locals>.<listcomp><   c                 S   s   g | ]}|r|  qS r   r   r   r   r   r   r      s      (   0   )Zskiprowsr         r   )ospathdirname__file__abspathjoinopenreadr   npZloadtxtTZastypeintuniquefloatr   fsfarray)filenameherefnamefdcontentr   	certifiedZdatafyxcatyr,   R2resstddfbndfwnprobr   r   r   getnist   s"    


r=   c                 C   s*  | d d t jf }|d d t jf |  }ddlm} ||d d d df |d d d df \}}}}	t |j|}
t ||  d |	j}|jd }|jd }|d }|| }|t| }|
t| }|| }t	j
|||}||
|  }t |}dd }t|||||f\}}}}||||fS )Nr   )groupsstats_dummyr   r   c                 S   s   t | dkr| d S | S d S )N)r   r   )r   r   )r'   shape)zr   r   r   _fix2scalarK   s    z!anova_oneway.<locals>._fix2scalar)r'   ZnewaxisZmeanZtry_catdatar>   dotr(   r?   r+   r   r,   r-   sqrtr   )r5   r6   seqZyrvsZxrvsr>   ZmeangZvargZ
xdevmeangrZcountgZsswnZssbnZnobsZncatr:   r;   ZmsbZmswr,   r<   r8   r9   rA   r   r   r   anova_oneway0   s*    


rE   c                 C   s8   t t|dd}t| | }|j|j|jt|j	fS )NF)prepend)
r   r   r   fitZfvalueZf_pvalueZrsquaredr'   rC   Z	mse_resid)r5   r6   Xresr   r   r   	anova_olsT   s    rJ   __main__z
 using new ANOVA anova_onewayzf, prob, R2, resstdgS㥛?g_Q[?g-C6?)r	   r   r
   gHz>)rtolz
 using stats ANOVA f_onewayc                 C   s   g | ]}t t|k qS r   )r6   r5   )r   iir   r   r   r   o   s     r   r   z
 using statsmodels.OLS)r   )"__doc__Zstatsmodels.compat.pythonr   r   numpyr'   Zscipyr   Zstatsmodels.tools.toolsr   Z#statsmodels.regression.linear_modelr   Ztry_ols_anovar   Z
filenamelir=   rE   rJ   __name__printfnr5   r6   certr4   r7   rI   getrL   testingZassert_allcloser.   ZxlistZf_onewayr   r   r   r   <module>   s^   
    
$

 

