U
    mdg                      @   sn  d dl mZmZ d dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZm Z  dddZ!eeeeeeedddddddde
e"ee" e#ee$ e#e#eee$ej%f  dddZ&eeee eeddddd i dddde
e"ee" ee' ee" ee( ee# e#e#ee
 d
ddZ)dS )    )OptionalDict)warnN)AnnData)issparse)logging)view_to_actualcheck_nonnegative_integers)_get_obs_rep_set_obs_rep)_doc_params)pca)	doc_adatadoc_dist_params	doc_layerdoc_check_valuesdoc_copydoc_inplacedoc_pca_chunkFc                 C   s  |r|   n| } |dkr td|d kr<| jd }t|}|dk rLtd|rbt| sbtdt t| rtj	| dd}tj	| dd}t	|
 }n*tj	| ddd}tj	| ddd}t	|}t|| | }	t| |	 }
|
t|	|	d	 |   }tj|| |d
}|S )Nr   z#Pearson residuals require theta > 0z3Pearson residuals require `clip>=0` or `clip=None`.zT`normalize_pearson_residuals()` expects raw count data, but non-integers were found.)axis   T)r   Zkeepdims   )Za_minZa_max)copy
ValueErrorshapenpsqrtr	   r   UserWarningr   sumZsqueezearrayclip)Xthetar    check_valuesr   nZ
sums_genesZ
sums_cells	sum_totalmudiff	residuals r)   ^/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/experimental/pp/_normalization.py_pearson_residuals   s2    


r+   )adatadist_paramsr#   layerinplacer   d   T)r"   r    r#   r.   r/   r   )r,   r"   r    r#   r.   r/   r   returnc                C   s   |r|st d|  } t|  t| |d}|r4|nd}d| }	t|	}
t||||| d}t|||d}|rt| ||d || j	d< ntf d|i|}tjd	|
d
 |r| S |s|S dS )aH      Applies analytic Pearson residual normalization, based on [Lause21]_.

    The residuals are based on a negative binomial offset model with overdispersion
    `theta` shared across genes. By default, residuals are clipped to `sqrt(n_obs)`
    and overdispersion `theta=100` is used.

    Expects raw count input.

    Params
    ------
    {adata}
    {dist_params}
    {check_values}
    {layer}
    {inplace}
    {copy}

    Returns
    -------
    If `inplace=True`, `adata.X` or the selected layer in `adata.layers` is updated
    with the normalized values. `adata.uns` is updated with the following fields.
    If `inplace=False`, the same fields are returned as dictionary with the
    normalized values in `results_dict['X']`.

    `.uns['pearson_residuals_normalization']['theta']`
         The used value of the overdisperion parameter theta.
    `.uns['pearson_residuals_normalization']['clip']`
         The used value of the clipping parameter.
    `.uns['pearson_residuals_normalization']['computed_on']`
         The name of the layer on which the residuals were computed.
    z0`copy=True` cannot be used with `inplace=False`.)r.   zadata.Xz(computing analytic Pearson residuals on )r   )r"   r    computed_onpearson_residuals_normalizationr!   z    finished ({time_passed}))timeN)
r   r   r   r
   logginfor+   dictr   uns)r,   r"   r    r#   r.   r/   r   r!   r2   msgstartr(   Zsettings_dictZresults_dictr)   r)   r*   normalize_pearson_residualsC   s(    3

r;   )r,   r-   Z	pca_chunkr#   r/   2   )r"   r    n_compsrandom_state
kwargs_pcause_highly_variabler#   r/   )
r,   r"   r    r=   r>   r?   r@   r#   r/   r1   c                C   s\  |rd|   krtd|dkr0d|   kr0d}|rp| dd| jd f  }	t|	j |	jg  |	jg  d}
n t| j | jg  | jg  d}
t|
|||d t|
f||d| |rT|
j	d }t
f |d	|
 i}|rtj| j|fd
| jd< |
jd | jd | jd < n|
jd | jd< |
j	d | j	d< || j	d< |
jd | jd< dS |
S dS )aT      Applies analytic Pearson residual normalization and PCA, based on [Lause21]_.

    The residuals are based on a negative binomial offset model with overdispersion
    `theta` shared across genes. By default, residuals are clipped to `sqrt(n_obs)`,
    overdispersion `theta=100` is used, and PCA is run with 50 components.

    Operates on the subset of highly variable genes in `adata.var['highly_variable']`
    by default. Expects raw count input.

    Params
    ------
    {adata}
    {dist_params}
    {pca_chunk}
    use_highly_variable
        If `True`, uses gene selection present in `adata.var['highly_variable']` to
        subset the data before normalizing (default). Otherwise, proceed on the full
        dataset.
    {check_values}
    {inplace}

    Returns
    -------
    If `inplace=False`, returns the Pearson residual-based PCA results (as :class:`~anndata.AnnData`
    object). If `inplace=True`, updates `adata` with the following fields:

    `.uns['pearson_residuals_normalization']['pearson_residuals_df']`
         The subset of highly variable genes, normalized by Pearson residuals.
    `.uns['pearson_residuals_normalization']['theta']`
         The used value of the overdisperion parameter theta.
    `.uns['pearson_residuals_normalization']['clip']`
         The used value of the clipping parameter.

    `.obsm['X_pca']`
        PCA representation of data after gene selection (if applicable) and Pearson
        residual normalization.
    `.varm['PCs']`
         The principal components containing the loadings. When `inplace=True` and
         `use_highly_variable=True`, this will contain empty rows for the genes not
         selected.
    `.uns['pca']['variance_ratio']`
         Ratio of explained variance.
    `.uns['pca']['variance']`
         Explained variance, equivalent to the eigenvalues of the covariance matrix.
    Zhighly_variablezYou passed `use_highly_variable=True`, but no HVG selection was found (e.g., there was no 'highly_variable' column in adata.var).'NT)obsvar)r"   r    r#   )r=   r>   r3   Zpearson_residuals_df)r   ZPCsr   ZX_pca)Zvar_keysr   rB   r   r   r!   rA   r;   r   r8   r7   Zto_dfr   ZzerosZn_varsZvarmZobsm)r,   r"   r    r=   r>   r?   r@   r#   r/   Z	adata_subZ	adata_pcaZnorm_settingsZ	norm_dictr)   r)   r*   normalize_pearson_residuals_pca   sB    C      

rC   )F)*typingr   r   warningsr   numpyr   ZpandaspdZanndatar   Zscipy.sparser   Zscanpyr   r5   Zscanpy._utilsr   r	   Z
scanpy.getr
   r   r   Zscanpy.preprocessing._pcar   Zscanpy.experimental._docsr   r   r   r   r   r   r   r+   floatboolstrZndarrayr;   intr7   rC   r)   r)   r)   r*   <module>   s~   $
)H
