U
    md                     @   s  d dl mZmZmZmZ d dlmZ d dlZd dl	m
Z
 d dlmZ d dlmZ zd dlmZ W n" ek
r   G dd	 d	ZY nX d d
lmZ d dlmZ d dlmZ d dlmZmZ dddZde
ee eeee  ee  eed ee  f ee  eeeee ej!f  dddZ"dS )    )OptionalUnionIterableDict)warnN)AnnData)issparse)sparsefuncs)Arrayc                   @   s   e Zd ZdS )	DaskArrayN)__name__
__module____qualname__ r   r   \/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.pyr      s   r   )logging)Literal)view_to_actual)_get_obs_rep_set_obs_repFc                 C   s   |r|   n| } t| jjttjfr0| tj} t	|t
rL||dk  }n||dk }|d krntj|ddn|}||dk7 }|| }t| rt| d|  nBt	|tjrtj| |d d d f | d nt| |d d d f } | S )Nr   )Zaxis   )out)copy
issubclassZdtypetypeintnpintegerZastypeZfloat32
isinstancer   Zcompute_chunk_sizesmedianr   r	   Zinplace_row_scalendarraydivide)Xcountsafterr   Zcounts_greater_than_zeror   r   r   _normalize_data   s    
r%   皙?Tall)adata
target_sumexclude_highly_expressedmax_fraction	key_addedlayerlayers
layer_norminplacer   returnc
                 C   s`  |	r|st d|  } |dk s(|dkr0t d|dk	rDttd |dk	rXttd |dkrl| j }nt|trt d	|t|  t	| |d
}
d}d}|r|

d}t|}|
|dddf | k
d}t|dk}|d| j|    7 }|
dd|f 
d}n
|

d}t|}t|}|dk}t|sVttd |r|dk	rp|| j|< t| t|
|||d
 ntt|
||dd|d}|dkr|}n2|dkrt|| }n|dkrd}nt d|dk	r|ndD ](}t| |||d}|s|d ||< qtjd|d |dk	rHtd|d |	rR| S |s\|S dS )u      Normalize counts per cell.

    Normalize each cell by total counts over all genes,
    so that every cell has the same total count after normalization.
    If choosing `target_sum=1e6`, this is CPM normalization.

    If `exclude_highly_expressed=True`, very highly expressed genes are excluded
    from the computation of the normalization factor (size factor) for each
    cell. This is meaningful as these can strongly influence the resulting
    normalized values for all other genes [Weinreb17]_.

    Similar functions are used, for example, by Seurat [Satija15]_, Cell Ranger
    [Zheng17]_ or SPRING [Weinreb17]_.

    Params
    ------
    adata
        The annotated data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    target_sum
        If `None`, after normalization, each observation (cell) has a total
        count equal to the median of total counts for observations (cells)
        before normalization.
    exclude_highly_expressed
        Exclude (very) highly expressed genes for the computation of the
        normalization factor (size factor) for each cell. A gene is considered
        highly expressed, if it has more than `max_fraction` of the total counts
        in at least one cell. The not-excluded genes will sum up to
        `target_sum`.
    max_fraction
        If `exclude_highly_expressed=True`, consider cells as highly expressed
        that have more counts than `max_fraction` of the original total counts
        in at least one cell.
    key_added
        Name of the field in `adata.obs` where the normalization factor is
        stored.
    layer
        Layer to normalize instead of `X`. If `None`, `X` is normalized.
    inplace
        Whether to update `adata` or return dictionary with normalized copies of
        `adata.X` and `adata.layers`.
    copy
        Whether to modify copied input object. Not compatible with inplace=False.

    Returns
    -------
    Returns dictionary with normalized copies of `adata.X` and `adata.layers`
    or updates `adata` with normalized version of the original
    `adata.X` and `adata.layers`, depending on `inplace`.

    Example
    --------
    >>> from anndata import AnnData
    >>> import scanpy as sc
    >>> sc.settings.verbosity = 2
    >>> np.set_printoptions(precision=2)
    >>> adata = AnnData(np.array([
    ...    [3, 3, 3, 6, 6],
    ...    [1, 1, 1, 2, 2],
    ...    [1, 22, 1, 2, 2],
    ... ]))
    >>> adata.X
    array([[ 3.,  3.,  3.,  6.,  6.],
           [ 1.,  1.,  1.,  2.,  2.],
           [ 1., 22.,  1.,  2.,  2.]], dtype=float32)
    >>> X_norm = sc.pp.normalize_total(adata, target_sum=1, inplace=False)['X']
    >>> X_norm
    array([[0.14, 0.14, 0.14, 0.29, 0.29],
           [0.14, 0.14, 0.14, 0.29, 0.29],
           [0.04, 0.79, 0.04, 0.07, 0.07]], dtype=float32)
    >>> X_norm = sc.pp.normalize_total(
    ...     adata, target_sum=1, exclude_highly_expressed=True,
    ...     max_fraction=0.2, inplace=False
    ... )['X']
    The following highly-expressed genes are not considered during normalization factor computation:
    ['1', '3', '4']
    >>> X_norm
    array([[ 0.5,  0.5,  0.5,  1. ,  1. ],
           [ 0.5,  0.5,  0.5,  1. ,  1. ],
           [ 0.5, 11. ,  0.5,  1. ,  1. ]], dtype=float32)
    z0`copy=True` cannot be used with `inplace=False`.r   r   z$Choose max_fraction between 0 and 1.NzbThe `layers` argument is deprecated. Instead, specify individual layers to normalize with `layer`.zcThe `layer_norm` argument is deprecated. Specify the target size factor directly with `target_sum`.r'   z5`layers` needs to be a list of strings or 'all', not )r-   znormalizing counts per cellzb The following highly-expressed genes are not considered during normalization factor computation:
zSome cells have zero countsT)r   )r"   Znorm_factorr$   r"   z)layer_norm should be "after", "X" or Noner   )r-   r)   r0   z    finished ({time_passed}))timez
and added z2, counts per cell before normalization (adata.obs))
ValueErrorr   r   FutureWarningr.   keysr   strr   r   sumr   ZravelZ	var_namestolistlogginfor'   UserWarningZobsr   r%   dictr   normalize_totaldebug)r(   r)   r*   r+   r,   r-   r.   r/   r0   r   r"   Zgene_subsetmsgZcounts_per_cellstartZcell_subsetZdatr$   Zlayer_to_normresr   r   r   r=   -   s    ^







 
 


   

r=   )NF)	NFr&   NNNNTF)#typingr   r   r   r   warningsr   numpyr   Zanndatar   Zscipy.sparser   Zsklearn.utilsr	   Z
dask.arrayr
   r   ImportErrorZscanpyr   r9   Zscanpy._compatr   Zscanpy._utilsr   Z
scanpy.getr   r   r%   floatboolr6   r    r=   r   r   r   r   <module>   sH   
         