U
    mdI                     @   s   d Z ddlZddlm  mZ ddlm  mZ	 ddl
m  mZ ddlmZmZ ddlZddlZddlZG dd dejZG dd deZG dd	 d	eZG d
d dejZG dd deZG dd de	jZeee dS )zA
Conditional logistic, Poisson, and multinomial logit regression
    N)MultinomialResultsMultinomialResultsWrapperc                	       sP   e Zd Zd fdd	Zdd Zd fdd	ZdddZed fdd	Z  Z	S )_ConditionalModelnonec                    s  d|krt d|d }|j|jkr0d}t ||jd |jkrLd}t |tt| j||fd|i| | jjd k	rd}t || j}|jd | _	i }t
|D ]&\}}	|	|krg ||	< ||	 | qt|t| }}|d	}
g | _g | _g | _|
d k	rt|
}
g | _g | _g | _d| _ddg}| D ]\}	}|| j}t|dkr|d  d7  < |d  t|7  < q6|  jt|7  _| j| |
d k	r| j|
|  | jt| | j||d d f  | jt| q6|d dkrd
t| }t| |
d k	rZg | _t
| jD ]$\}}| jt| j| | q4t| j| _ g | _!g | _"t#| j D ]>}	| j!t| j|	 | j|	  | j"t| j|	  q|d S )Ngroups'groups' is a required argumentz4'endog' and 'groups' should have the same dimensionsr   zBThe leading dimension of 'exog' should equal the length of 'endog'missingzDConditional models should not have an intercept in the design matrix   offsetzIDropped %d groups and %d observations for having no within-group variance)$
ValueErrorsizeshapesuperr   __init__dataZ	const_idxexogk_params	enumerateappendnpZasarrayget
_endog_grp	_exog_grp
_groupsize_offset_grp_offset_sumynobsitemsZflatZstdlensumtuplewarningswarn_endofsdot	_n_groups_xy_n1range)selfendogr   r   kwargsr   msgZrow_ixigr
   ZdropsZixykofs	__class__ `/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/statsmodels/discrete/conditional_models.pyr      s    
 






 z_ConditionalModel.__init__c                 C   s&   ddl m} ||| j}t|}|S )Nr   )approx_fprime)Zstatsmodels.tools.numdiffr7   scorer   Z
atleast_2d)r*   paramsr7   Zhessr5   r5   r6   hessianb   s    
z_ConditionalModel.hessianNBFGSd   TFr5   c
                    s~   t t| j||||||	d}t| |j| d}||_| j|_| j|_	dt
| j dt| j dt| j g|_t|}|S )Nstart_paramsmethodmaxiterfull_outputdispskip_hessianr	   z%dz%.1f)r   r   fitConditionalResultsr9   Z
cov_paramsr?   r   r&   n_groupsminr   maxr   Zmean_group_statsConditionalResultsWrapper)r*   r>   r?   r@   rA   rB   fargscallbackretallrC   r,   rsltZcrsltr3   r5   r6   rD   i   s$    
z_ConditionalModel.fitelastic_net        c                 K   sN   ddl m} |dkrtdddddd}|| || f||||d	|S )
a  
        Return a regularized fit to a linear regression model.

        Parameters
        ----------
        method : {'elastic_net'}
            Only the `elastic_net` approach is currently implemented.
        alpha : scalar or array_like
            The penalty weight.  If a scalar, the same penalty weight
            applies to all variables in the model.  If a vector, it
            must have the same length as `params`, and contains a
            penalty weight for each coefficient.
        start_params : array_like
            Starting values for `params`.
        refit : bool
            If True, the model is refit using only the variables that
            have non-zero coefficients in the regularized fit.  The
            refitted model is not regularized.
        **kwargs
            Additional keyword argument that are used when fitting the model.

        Returns
        -------
        Results
            A results instance.
        r   )fit_elasticnetrO   z.method for fit_regularized must be elastic_net2   r	   g|=)r@   ZL1_wtZ	cnvrg_tolZzero_tol)r?   alphar>   refit)Zstatsmodels.base.elastic_netrQ   r   update)r*   r?   rS   r>   rT   r,   rQ   defaultsr5   r5   r6   fit_regularized   s    !
z!_ConditionalModel.fit_regularizedc           	         s   z|d }|d= W n t k
r.   tdY nX t|trB|| }d|ddkr\td tt| j	|f|||d|}|S )Nr   r   z0+  z2Conditional models should not include an intercept)r   r   )
KeyErrorr   
isinstancestrreplacer"   r#   r   r   from_formula)	clsZformular   ZsubsetZ	drop_colsargsr,   r   modelr3   r5   r6   r^      s(    	



 z_ConditionalModel.from_formula)r   )	Nr;   r<   TFr5   NFF)rO   rP   NF)NN)
__name__
__module____qualname__r   r:   rD   rW   classmethodr^   __classcell__r5   r5   r3   r6   r      s*   P         !    
1  r   c                       sV   e Zd ZdZd fdd	Zdd Zdd Zdd
dZdddZdd Z	dd Z
  ZS )ConditionalLogita  
    Fit a conditional logistic regression model to grouped data.

    Every group is implicitly given an intercept, but the model is fit using
    a conditional likelihood in which the intercepts are not present.  Thus,
    intercept estimates are not given, but the other parameter estimates can
    be interpreted as being adjusted for any group-level confounders.

    Parameters
    ----------
    endog : array_like
        The response variable, must contain only 0 and 1.
    exog : array_like
        The array of covariates.  Do not include an intercept
        in this array.
    groups : array_like
        Codes defining the groups. This is a required keyword parameter.
    r   c                    sX   t t| j||fd|i| tt| jtjd krFd}t|| j	j
d | _d S )Nr   )r   r	   zendog must be coded as 0, 1r	   )r   rg   r   r   anyuniquer+   Zr_r   r   r   K)r*   r+   r   r   r,   r-   r3   r5   r6   r      s    
 zConditionalLogit.__init__c                 C   s,   d}t t| jD ]}|| ||7 }q|S Nr   )r)   r   r   loglike_grp)r*   r9   llr/   r5   r5   r6   loglike   s    zConditionalLogit.loglikec                 C   s(   d}t | jD ]}|| ||7 }q|S rk   )r)   r&   	score_grp)r*   r9   r8   r/   r5   r5   r6   r8      s    zConditionalLogit.scoreNc                    sR   |d krd}t t | j| ||  i  fdd| j| | j| S )Nr   c                    sx   | |k rdS |dkrdS z| |f W S  t k
r:   Y nX | d || d |d  | d    }|| |f< |S )Nr   r	   )rZ   )tr1   vexbfmemor5   r6   rt     s    ,z"ConditionalLogit._denom.<locals>.f)r   expr%   r   r   r(   r*   grpr9   r2   r5   rr   r6   _denom  s    zConditionalLogit._denomc                    sZ   |d krd}j |  tt || i  fddj| j| S )Nr   c           
         s   | |k rdt jfS |dkr$dS z| |f W S  tk
rF   Y nX | d  }| d |\}}| d |d \}}||  | d d d f  }|||  || ||   }}	||	f| |f< ||	fS )Nr   )r	   r   r	   )r   zerosr   rZ   )
rp   r1   habcedurq   exrs   ru   sr*   r5   r6   r   .  s    z'ConditionalLogit._denom_grad.<locals>.s)r   r   rv   r%   r   r(   rw   r5   r   r6   _denom_grad"  s    
zConditionalLogit._denom_gradc                 C   s\   d }t | dr| j| }t| j| |}|d k	r@|| j| 7 }|t| |||8 }|S )Nr
   )hasattrr   r   r%   r'   r$   logry   )r*   rx   r9   r2   Zllgr5   r5   r6   rl   F  s    

zConditionalLogit.loglike_grpc                 C   s<   d}t | dr| j| }| |||\}}| j| ||  S )Nr   r
   )r   r   r   r'   )r*   rx   r9   r2   r   r{   r5   r5   r6   ro   U  s
    

zConditionalLogit.score_grp)r   )N)N)rb   rc   rd   __doc__r   rn   r8   ry   r   rl   ro   rf   r5   r5   r3   r6   rg      s   

$rg   c                   @   s    e Zd ZdZdd Zdd ZdS )ConditionalPoissonaU  
    Fit a conditional Poisson regression model to grouped data.

    Every group is implicitly given an intercept, but the model is fit using
    a conditional likelihood in which the intercepts are not present.  Thus,
    intercept estimates are not given, but the other parameter estimates can
    be interpreted as being adjusted for any group-level confounders.

    Parameters
    ----------
    endog : array_like
        The response variable
    exog : array_like
        The covariates
    groups : array_like
        Codes defining the groups. This is a required keyword parameter.
    c           	      C   s   d }t | dr| j}d}tt| jD ]n}t| j| |}|d k	rP||| 7 }t|}| j| }|t||7 }|	 }|| j
| t| 8 }q&|S Nr
   rP   )r   r   r)   r   r   r   r%   r   rv   r    r   r   )	r*   r9   r2   rm   r.   xbrs   r0   r   r5   r5   r6   rn   r  s    


zConditionalPoisson.loglikec           
      C   s   d }t | dr| j}d}tt| jD ]x}| j| }t||}|d k	rT||| 7 }t|}|	 }| j| }	|t|	|7 }|| j
| t|| | 8 }q&|S r   )r   r   r)   r   r   r   r   r%   rv   r    r   )
r*   r9   r2   r8   r.   xr   rs   r   r0   r5   r5   r6   r8     s    



 zConditionalPoisson.scoreN)rb   rc   rd   r   rn   r8   r5   r5   r5   r6   r   _  s   r   c                       s&   e Zd Z fddZdddZ  ZS )rE   c                    s   t t| j||||d d S )N)normalized_cov_paramsscale)r   rE   r   )r*   ra   r9   r   r   r3   r5   r6   r     s    
zConditionalResults.__init__N皙?c           	      C   s   dddd| j gfddg}dd| jgfd	| jd
 gfd| jd gfd| jd gfg}|dkr^d}d
dlm} | }|j| |||||d |j| |||| jd |S )a<  
        Summarize the fitted model.

        Parameters
        ----------
        yname : str, optional
            Default is `y`
        xname : list[str], optional
            Names for the exogenous variables, default is "var_xx".
            Must match the number of parameters in the model
        title : str, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            Significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            This holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary
            results
        )zDep. Variable:N)zModel:N)zLog-Likelihood:NzMethod:)zDate:N)zTime:N)zNo. Observations:NzNo. groups:zMin group size:r   zMax group size:r	   zMean group size:   Nz*Conditional Logit Model Regression Results)Summary)ZgleftZgrightynamexnametitle)r   r   rS   use_t)r?   rF   rI   Zstatsmodels.iolib.summaryr   Zadd_table_2colsZadd_table_paramsr   )	r*   r   r   r   rS   top_left	top_rightr   Zsmryr5   r5   r6   summary  sB    


    zConditionalResults.summary)NNNr   )rb   rc   rd   r   r   rf   r5   r5   r3   r6   rE     s   rE   c                	       s<   e Zd ZdZd fdd	ZdddZdd Zdd Z  ZS )ConditionalMNLogita  
    Fit a conditional multinomial logit model to grouped data.

    Parameters
    ----------
    endog : array_like
        The dependent variable, must be integer-valued, coded
        0, 1, ..., c-1, where c is the number of response
        categories.
    exog : array_like
        The independent variables.
    groups : array_like
        Codes defining the groups. This is a required keyword parameter.

    Notes
    -----
    Equivalent to femlogit in Stata.

    References
    ----------
    Gary Chamberlain (1980).  Analysis of covariance with qualitative
    data. The Review of Economic Studies.  Vol. 47, No. 1, pp. 225-238.
    r   c                    s  t t| j||fd|i| | jt| _| j d | _| jd | jj	d  | _
| j| j
 | _dd t| jD | _| j| _| jj	d | _| j dk rd}t|tt t| jD ]\}} | | qt  | _| j   fdd| jD | _d S )	Nr   r	   c                 S   s   i | ]}|t |qS r5   )r\   ).0jr5   r5   r6   
<dictcomp>  s      z/ConditionalMNLogit.__init__.<locals>.<dictcomp>r   z%endog may not contain negative valuesc                    s   g | ]} | qS r5   r5   )r   r1   Zgrxr5   r6   
<listcomp>  s     z/ConditionalMNLogit.__init__.<locals>.<listcomp>)r   r   r   r+   ZastypeintrH   k_catr   r   Zdf_modelr   Zdf_residr)   Z_ynames_mapJrj   rG   r   collectionsdefaultdictlistr   r   r   keysZ_group_labelssort_grp_ix)r*   r+   r   r   r,   r-   r1   rq   r3   r   r6   r      s0    
 

zConditionalMNLogit.__init__Nr;   r<   TFr5   c
              	   K   s   |d kr0| j jd }| jd }tjj|| d}tjj| ||||||	d}|j	
| j jd df|_	t| |}|jtjd t|S )Nr	   )r   r=   )Zllnull)r   r   r   r   randomnormalbaseLikelihoodModelrD   r9   reshaper   Zset_null_optionsnanr   )r*   r>   r?   r@   rA   rB   rK   rL   rM   rC   r,   qr~   rN   r5   r5   r6   rD     s"    
	
zConditionalMNLogit.fitc                 C   s   | j jd }| jd }|||f}tjt|df|fdd}t| j |}d}| jD ]~}||d d f }tj	|jd t
d}	| j| }
d}t|
D ]}|t||	|f  7 }q|||	|
f  t| 7 }qX|S )Nr	   ZaxisrP   r   Zdtype)r   r   r   r   r   concatenaterz   r%   r   aranger   r+   	itertoolspermutationsrv   r    r   )r*   r9   r   r~   pmatlprrm   iir   jjr0   denompr5   r5   r6   rn   ?  s    


 zConditionalMNLogit.loglikec                 C   s  | j jd }| jd }|||f}tjt|df|fdd}t| j |}t||f}| jD ]}||d d f }tj	|jd t
d}	| j| }
d}t||f}t|
D ]n}t||	|f  }||7 }t|D ]B\}}|dkr|d d |d f  || j || d d f  7  < qqt|
D ]B\}}|dkr*|d d |d f  | j || d d f 7  < q*||| 8 }qb| S )Nr	   r   r   r   rP   )r   r   r   r   r   r   rz   r%   r   r   r   r+   r   r   rv   r    r   flatten)r*   r9   r   r~   r   r   Zgradr   r   r   r0   r   Zdenomgr   rq   r.   rr5   r5   r6   r8   U  s.    

6
2zConditionalMNLogit.score)r   )	Nr;   r<   TFr5   NFF)	rb   rc   rd   r   r   rD   rn   r8   rf   r5   r5   r3   r6   r     s            
%r   c                   @   s   e Zd ZdS )rJ   N)rb   rc   rd   r5   r5   r5   r6   rJ   v  s   rJ   )r   numpyr   Zstatsmodels.base.modelr   ra   Z#statsmodels.regression.linear_modelZ
regressionZlinear_modelZlmZstatsmodels.base.wrapperwrapperwrapZ#statsmodels.discrete.discrete_modelr   r   r   r"   r   r   r   rg   r   ZLikelihoodModelResultsrE   r   ZRegressionResultsWrapperrJ   Zpopulate_wrapperr5   r5   r5   r6   <module>   s$    F ?I 