U
    mda                     @   sn   d Z ddlmZ ddlZddlmZ dd Zdd Zd	d
 Z	dd Z
dddZdd ZdddZdddZdS )a  
Working with categorical data
=============================

use of dummy variables, group statistics, within and between statistics
examples for efficient matrix algebra

dummy versions require that the number of unique groups or categories is not too large
group statistics with scipy.ndimage can handle large number of observations and groups
scipy.ndimage stats is missing count

new: np.bincount can also be used for calculating values per label
    )lrangeN)ndimagec                 C   s2   t t | d }t tj|| |d}||  S )N   labelsindex)nparangemaxarrayr   mean)yxlabelsunique
labelmeans r   c/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/statsmodels/sandbox/regression/try_catdata.pylabelmeanfilter   s    r   c              
   C   s   t t | d }g }g }|jD ]2}t tj|| |d}|||   || q"t tj| |d |d d d| |d}|t |t |jfS )Nr   r   r   )	r   r	   r
   Tr   r   r   appendZ	histogram)r   r   r   ZlabmeansdataZlabmeansZxxr   Z
labelcountr   r   r   labelmeanfilter_nd"   s    
  r   c              
   C   sF   t j| ddd\}}t tj||t t |d d}|| }|S )NFTZreturn_indexreturn_inverser   r   )r   uniquer   r   r   r	   r
   )ysr   unilunilinvr   Zarr3r   r   r   labelmeanfilter_str7   s    &r   c           
      C   sv   t | }tj| dd\}}t|}tj||dd|  }|| }tj||| d dd|  }|| }	|||||	fS )z:uses np.bincount, assumes factors/labels are integers
    r   )r   )weightsg      ?   )lenr   r   Zbincount)
ZfactorsvaluesnZixZrindZgcountZgmeanZmeanarrZ	withinvarZwithinvararrr   r   r   groupstatsbin?   s    
r$   c                 C   s|   |dkr| }nFt |}|jdkrP| jdkrPt dd | D ddt jf }n| }t j|ddd\}}|t t||fS )	zconvert labels based on multiple variables or string labels to unique
    index labels 0,1,2,...,nk-1 where nk is the number of distinct labels
    Nr   r    c                 S   s    g | ]}d |dd    qS )z@%s@Nr    )tostring).0iir   r   r   
<listcomp>U   s     z!convertlabels.<locals>.<listcomp>FTr   )r   r   sizendimnewaxisr   r	   r!   )r   indicesZylabelidxr   r   r   r   r   convertlabelsL   s    
$r.   c                 C   s4   t tj|| |d}t tj|| |d}||fS )z)use ndimage to get fast mean and variancer   )r   r   r   r   var)r   r   r   r   Z	labelvarsr   r   r   groupsstats_1d`   s    r0   c                 C   s   |s| j dkr8| jd dkr8t| t| jd \}}}n$|  }|  }t||  d }|j dkrx|d d tj	f }||k
t}|S )Nr    r   )r*   shaper.   r   copyminr   r	   r
   r+   Zastypeint)r   nonseqZycatZuniquesZ	unitranslZymindummyr   r   r   	cat2dummyf   s    
r7   c           	      C   s   |j dkr|d d tjf }t| |d}|jdtd}t|j|| }t||j}|| }t|| j|| }||||fS )Nr   )r5   r   )Zdtype)r*   r   r+   r7   sumfloatdotr   )	r   r   r5   r6   ZcountgrZmeangrZmeandataZ
xdevmeangrZvargrr   r   r   groupsstats_dummys   s    
r;   )N)r   )r   )__doc__Zstatsmodels.compat.pythonr   numpyr   Zscipyr   r   r   r   r$   r.   r0   r7   r;   r   r   r   r   <module>   s   

