U
    md#                     @   s   d dl mZmZ d dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ eeeeeeedd	dd
dd
dd i ddd
eeee eee eee ee eeeeeeejf  dddZdS )    )OptionalTuple)AnnDataN)experimental)pca)	doc_adatadoc_dist_paramsdoc_genes_batch_chunkdoc_pca_chunk	doc_layerdoc_check_valuesdoc_inplace)_doc_params)adataZdist_paramsZgenes_batch_chunkZ	pca_chunkcheck_valuesinplaced   i  2   T)
thetaclipn_top_genes	batch_key	chunksizen_compsrandom_state
kwargs_pcar   r   )r   r   r   r   r   r   r   r   r   r   r   returnc       
      	   C   s:  t d||||||	d}|
rNtjj| f|ddi | dd| jd f  }n0tjj| f|ddi}| dd|d f  }tjj||||	d t|f||d	| |
r.|jd
 }t f |d|	 i}|jd | jd< t
j| j|fd| jd< |jd | jd | jd < || jd
< |jd | jd< dS ||fS dS )a	      Full pipeline for HVG selection and normalization by analytic Pearson residuals ([Lause21]_).

    Applies gene selection based on Pearson residuals. On the resulting subset,
    Pearson residual normalization and PCA are performed.

    Expects raw count input.

    Params
    ------
    {adata}
    {dist_params}
    {genes_batch_chunk}
    {pca_chunk}
    {check_values}
    {inplace}

    Returns
    -------
    If `inplace=False`, separately returns the gene selection results (as
    :class:`~pandas.DataFrame`) and Pearson residual-based PCA results (as
    :class:`~anndata.AnnData`). If `inplace=True`, updates `adata` with the
    following fields for gene selection results:

    `.var['highly_variable']` : bool
        boolean indicator of highly-variable genes.
    `.var['means']` : float
        means per gene.
    `.var['variances']` : float
        variances per gene.
    `.var['residual_variances']` : float
        Pearson residual variance per gene. Averaged in the case of multiple
        batches.
    `.var['highly_variable_rank']` : float
        Rank of the gene according to residual variance, median rank in the
        case of multiple batches.
    `.var['highly_variable_nbatches']` : int
        If batch_key is given, this denotes in how many batches genes are
        detected as HVG.
    `.var['highly_variable_intersection']` : bool
        If batch_key is given, this denotes the genes that are highly variable
        in all batches.

    The following fields contain Pearson residual-based PCA results and
    normalization settings:

    `.uns['pearson_residuals_normalization']['pearson_residuals_df']`
         The subset of highly variable genes, normalized by Pearson residuals.
    `.uns['pearson_residuals_normalization']['theta']`
         The used value of the overdisperion parameter theta.
    `.uns['pearson_residuals_normalization']['clip']`
         The used value of the clipping parameter.

    `.obsm['X_pca']`
        PCA representation of data after gene selection and Pearson residual
        normalization.
    `.varm['PCs']`
         The principal components containing the loadings. When `inplace=True` this
         will contain empty rows for the genes not selected during HVG selection.
    `.uns['pca']['variance_ratio']`
         Ratio of explained variance.
    `.uns['pca']['variance']`
         Explained variance, equivalent to the eigenvalues of the covariance matrix.
    Zpearson_residuals)Zflavorr   r   r   r   r   r   r   TNZhighly_variableF)r   r   r   )r   r   Zpearson_residuals_normalizationZpearson_residuals_dfr   )shapeZPCsZX_pca)dictr   ppZhighly_variable_genesvarcopyZnormalize_pearson_residualsr   ZunsZto_dfnpZzerosZn_varsZvarmZobsm)r   r   r   r   r   r   r   r   r   r   r   Zhvg_argsZ	adata_pcaZhvgZnormalization_paramZnormalization_dict r#   X/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/experimental/pp/_recipes.pyrecipe_pearson_residuals   sF    W
   

r%   )typingr   r   Zanndatar   Zpandaspdnumpyr"   Zscanpyr   Zscanpy.preprocessingr   Zscanpy.experimental._docsr   r   r	   r
   r   r   r   Zscanpy._utilsr   floatintstrr   boolZ	DataFramer%   r#   r#   r#   r$   <module>   sN   $	