U
    mdH7                     @   sN  d dl mZmZmZmZ d dlmZ d dlZd dlZ	d dl
Zd dlmZmZmZ d dlmZmZ d dlmZ d dlmZ dd	lmZmZmZmZmZmZ d
dlmZ d2ddZeeeeeedddddddddddd
ee e ee  eee!  ee  e"ee  e"eej# d
ddZ$eeeeedddddddddee e ee  e"eej# dddZ%eeeeeeeddddddddddd	ee e ee  eee!  ee  e"e"e"ee" eeej#ej#f  d d!d"Z&ee	j'ef e!d#d$d%Z(d&d' Z)d(d) Z*ee	j'ef ee! e	j+d*d+d,Z,ee	j'ef ee! e	j+d*d-d.Z-ej.ddd/d0d1 Z/dS )3    )OptionalTuple
CollectionUnion)warnN)issparseisspmatrix_csrisspmatrix_coo)spmatrix
csr_matrix)mean_variance_axis)AnnData   )doc_expr_repsdoc_obs_qc_argsdoc_qc_metric_namingdoc_obs_qc_returnsdoc_var_qc_returnsdoc_adata_basic   )_doc_paramsFc                 C   sJ   |d k	}|r&|r&t d| d| d|r4| j| S |r@| jjS | jS d S )NzECannot use expression from both layer and raw. You provided:'use_raw=z' and 'layer=')
ValueErrorZlayersrawX)adatause_rawlayerZis_layer r   Q/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/preprocessing/_qc.py_choose_mtx_rep   s    
r    )r   r   r   r   r   countsZgenesr   )2   d      i  T)
	expr_typevar_typeqc_varspercent_topr   r   log1pinplacer   parallel)
r   r%   r&   r'   r(   r   r   r)   r*   returnc       
      	   C   s   |
dk	rt dt |	dkrFt| ||}	t|	r6t|	}	t|	rF|	  tj| j	d}t|	rz|	j
dd|d| d| < ntj|	dd|d| d| < |rt|d| d|  |d| d| < t|	jdd|d	| < |rt|d	|  |d
| < |r\t|}t|	|}t|D ]4\}}|dd|f d |d| d| d| < q&|D ]}t|	dd| j| jf jdd|d	| d| < |rt|d	| d|  |d
| d| < |d	| d|  |d	|   d |d| d| < q`|r|| j|j< n|S dS )a      Describe observations of anndata.

    Calculates a number of qc metrics for observations in AnnData object. See
    section `Returns` for a description of those metrics.

    Note that this method can take a while to compile on the first call. That
    result is then cached to disk to be used later.

    Params
    ------
    {doc_adata_basic}
    {doc_qc_metric_naming}
    {doc_obs_qc_args}
    {doc_expr_reps}
    log1p
        Add `log1p` transformed metrics.
    inplace
        Whether to place calculated metrics in `adata.obs`.
    X
        Matrix to calculate values on. Meant for internal usage.

    Returns
    -------
    QC metrics for observations in adata. If inplace, values are placed into
    the AnnData's `.obs` dataframe.

    {doc_obs_qc_returns}
    N?Argument `parallel` is deprecated, and currently has no effect.indexr   ZaxisZn_Z_by_Zlog1p_n_Ztotal_Zlog1p_total_r#   Zpct_Z_in_top__)r   FutureWarningr    r	   r   r   eliminate_zerospd	DataFrameZ	obs_namesgetnnznpcount_nonzeror)   ravelsumsortedtop_segment_proportions	enumeratevarvaluesZobscolumns)r   r%   r&   r'   r(   r   r   r)   r*   r   r+   obs_metricsZproportionsinZqc_varr   r   r   describe_obs&   s^    2
rD   )r   r   r   r   )r%   r&   r   r   r*   r)   r   )r   r%   r&   r   r   r,   c                C   s8  |dkr4t | ||}t|r$t|}t|r4|  tj| jd}t|rp|jdd|d< t	|ddd |d< n"t
j|dd|d< |jdd|d< |rt
|d |d< d|d |jd   d	 |d
< t
|jdd|d< |rt
|d |d< g }	|jD ]}
|	|
jf t  q|	|_|r0|| j|j< n|S dS )a9      Describe variables of anndata.

    Calculates a number of qc metrics for variables in AnnData object. See
    section `Returns` for a description of those metrics.

    Params
    ------
    {doc_adata_basic}
    {doc_qc_metric_naming}
    {doc_expr_reps}
    inplace
        Whether to place calculated metrics in `adata.var`.
    X
        Matrix to calculate values on. Meant for internal usage.

    Returns
    -------
    QC metrics for variables in adata. If inplace, values are placed into the
    AnnData's `.var` dataframe.

    {doc_var_qc_returns}
    Nr.   r   r0   zn_cells_by_{expr_type}zmean_{expr_type}zlog1p_mean_{expr_type}r   r#   zpct_dropout_by_{expr_type}ztotal_{expr_type}zlog1p_total_{expr_type})r    r	   r   r   r3   r4   r5   Z	var_namesr6   r   r7   r8   Zmeanr)   shaper9   r:   r@   appendformatlocalsr>   )r   r%   r&   r   r   r*   r)   r   var_metricsZnew_colnamescolr   r   r   describe_var   s>    )
rK   )r   r   r   r   r   r   )	r%   r&   r'   r(   r   r   r*   r)   r+   )r   r%   r&   r'   r(   r   r   r*   r)   r+   r,   c       	      
   C   sz   |	dk	rt dt t| ||}
t|
r.t|
}
t|
r>|
  t| ||||||
|d}t| ||||
|d}|sv||fS dS )a      Calculate quality control metrics.

    Calculates a number of qc metrics for an AnnData object, see section
    `Returns` for specifics. Largely based on `calculateQCMetrics` from scater
    [McCarthy17]_. Currently is most efficient on a sparse CSR or dense matrix.

    Note that this method can take a while to compile on the first call. That
    result is then cached to disk to be used later.

    Parameters
    ----------
    {doc_adata_basic}
    {doc_qc_metric_naming}
    {doc_obs_qc_args}
    {doc_expr_reps}
    inplace
        Whether to place calculated metrics in `adata`'s `.obs` and `.var`.
    log1p
        Set to `False` to skip computing `log1p` transformed annotations.

    Returns
    -------
    Depending on `inplace` returns calculated metrics
    (as :class:`~pandas.DataFrame`) or updates `adata`'s `obs` and `var`.

    {doc_obs_qc_returns}

    {doc_var_qc_returns}

    Example
    -------
    Calculate qc metrics for visualization.

    .. plot::
        :context: close-figs

        import scanpy as sc
        import seaborn as sns

        pbmc = sc.datasets.pbmc3k()
        pbmc.var["mito"] = pbmc.var_names.str.startswith("MT-")
        sc.pp.calculate_qc_metrics(pbmc, qc_vars=["mito"], inplace=True)
        sns.jointplot(
            data=pbmc.obs,
            x="log1p_total_counts",
            y="log1p_n_genes_by_counts",
            kind="hex",
        )

    .. plot::
        :context: close-figs

        sns.histplot(pbmc.obs["pct_counts_mito"])
    Nr-   )r%   r&   r'   r(   r*   r   r)   )r%   r&   r*   r   r)   )	r   r2   r    r	   r   r   r3   rD   rK   )r   r%   r&   r'   r(   r   r   r*   r)   r+   r   rA   rI   r   r   r   calculate_qc_metrics   s<    L
	rL   mtxrC   c                 C   s<   t | r.t| st| } t| j| jt|S t| |S dS )a^      Calculates cumulative proportions of top expressed genes

    Parameters
    ----------
    mtx
        Matrix, where each row is a sample, each column a feature.
    n
        Rank to calculate proportions up to. Value is treated as 1-indexed,
        `n=50` will calculate cumulative proportions up to the 50th most
        expressed gene.
    N)	r   r   r   top_proportions_sparse_csrdataindptrr7   arraytop_proportions_denserM   r   r   r   top_proportionsI  s
    rT   c                 C   s   | j dd}ttjd|  |d }|d d d |f }tj|tjd}t|jd D ]P}| |||d d f f }|d d d   t	|||  }|||d d f< qV|S )Nr   r0   dtyper   )
r:   r7   apply_along_axisZargpartitionZ
zeros_likefloat64rangerE   sortcumsum)rN   rC   sumspartitionedr?   rB   vecr   r   r   rS   _  s    rS   c           	      C   s   t j|jd |ft jd}t|jd D ]}|| ||d   }}t j|t jd}|| |kr| || |d || < | }n<t | ||  |d d |  |d d < | ||  }|d d d   |	 | ||d d f< q*|S )Nr   rU   rW   )
r7   zerossizerY   numbapranger:   	partitionr[   r\   )	rP   rQ   rC   r?   rB   startendr_   totalr   r   r   rO   l  s    
,rO   )rN   nsr,   c                 C   sb   t || jd krt|dks&tdt| rTt| s>t| } t| j| j	t
|S t| |S dS )as  
    Calculates total percentage of counts in top ns genes.

    Parameters
    ----------
    mtx
        Matrix, where each row is a sample, each column a feature.
    ns
        Positions to calculate cumulative proportion at. Values are considered
        1-indexed, e.g. `ns=[50]` will calculate cumulative proportion up to
        the 50th most expressed gene.
    r   r   z$Positions outside range of features.N)maxrE   min
IndexErrorr   r   r   "top_segment_proportions_sparse_csrrP   rQ   r7   rR   top_segment_proportions_dense)rN   rh   r   r   r   r<   |  s    r<   c           	      C   s   t |}| jdd}t t jd| | jd | d d d d df d d d |d f }t | jd t|f}t | jd }d}t|D ]<\}}||d d ||f jdd7 }||d d |f< |}q||d d d f  S )Nr   r0   rW   r   )	r7   r[   r:   rX   rd   rE   r`   lenr=   )	rN   rh   r]   r^   r?   accprevjrC   r   r   r   rm     s    
4 rm   )cacher+   c                 C   s  | tj}| tj}t|}|d }tj|jd | jd}tj|jd t|ftjd}tj|jd |f| jd}t	
|jd D ]}|| ||d   }}	t| ||	 ||< |	| |kr| ||	 ||d |	| f< n:|	| |krt| ||	  |d |  ||d d f< t||d d f || ||d d f< q|d d d d df d d d |d f }tj|jd | jd}
d}t|jD ]B}|
|d d ||| f jdd7 }
|
|d d |f< || }q|||jd df S )NrW   r   rU   r   r0   )Zastyper7   Zint64r[   r`   ra   rV   rn   rY   rb   rc   r:   rd   rZ   Zreshape)rP   rQ   rh   Zmaxidxr]   r?   r^   rB   re   rf   ro   rp   rq   r   r   r   rl     s.    
,**$rl   )FN)0typingr   r   r   r   warningsr   rb   numpyr7   Zpandasr4   Zscipy.sparser   r   r	   r
   r   Zsklearn.utils.sparsefuncsr   Zanndatar   Z_docsr   r   r   r   r   r   _utilsr   r    strintboolr5   rD   rK   rL   rR   rT   rS   rO   Zndarrayr<   rm   Znjitrl   r   r   r   r   <module>   s    


_	H
g  