U
    mdɎ                     @   s  d Z ddlmZ ddlmZ ddlZddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlZddlmZmZmZmZ ddlmZmZ ddlmZ dd	lmZ d
dlmZ  d
dl!m"Z# d
dl$m%Z%m&Z&m'Z'm(Z(m)Z) d
dl*m+Z+ d
dl,m-Z-m.Z. ddl/m0Z0 ddl$m1Z1 zddl2m3Z4 W n e5k
r.   dZ4Y nX ddl6m7Z7 dVeee8 ee8 ee8 ee8 e9e9ee	ej:ej:f  dddZ;dWeee8 ee8 ee8 ee8 e9e9eede	ej:ej:f f dddZ<edddddddeeej:ef ee e9e9ee8 ee= ee= dddZ>e>?eddd ee e9d d!d"Z@e>?ej:ddd ee e9d d#d$ZAe>?edddddddee e9e9ee8 ee= ee= ee d%d&d'ZBdXee9e9ee8 ee d(d)d*ZCdYeeej:ef eeD eej: e=e9ee+d- ee= f ee+d.  e8ee d/	d0d1ZEdZeee=ee= f ee8 e9ee d2d3d4ZFd5d6 ZGed[eeeej:f e9eeD e9ee= ee= d7d8d9ZHeH?ej:ddddd:e9eeD e9e9d:d;d<ZIeH?eddddd:e9eeD e9e9d:d=d>ZJeH?edddddd?ee9eeD e9ee= ee= ee d@dAdBZKd\eeej:ef eeD ee8 e(e9ee dCdDdEZLe&dFdGid]ddddHeeee8e
e8 f  ee8 e(e9e9ee dIdJdKZMdLdM ZNdNdO ZOejPddPd^ej:e8e(e9e9dQdRdSZQd_dTdUZRdS )`zdSimple Preprocessing Functions

Compositions of these functions are found in sc.preprocess.recipes.
    )singledispatch)NumberN)UnionOptionalTuple
CollectionSequenceIterable)issparseisspmatrix_csr
csr_matrixspmatrix)sparsefuncscheck_array)is_categorical_dtype)AnnData   )logging)settings)sanitize_anndatadeprecated_arg_namesview_to_actual	AnyRandom_check_array_function_arguments)Literal)_get_obs_rep_set_obs_rep   )materialize_as_ndarray)_get_mean_var)filter_genes_dispersionTF)data
min_counts	min_genes
max_counts	max_genesinplacecopyreturnc                 C   s  |rt d tdd ||||fD }|dkr8tdt| tr|rN|  n| }tt|j	||||\}	}
|sx|	|
fS |dkr|dkr|
|j
d< n
|
|j
d< ||	 |r|S dS | }|dkr|n|}|dkr|n|}tj|dkr|dkr|n|d	kdd
}t|r|j}|dk	r ||k}	|dk	r2||k}	t|	 }|d	krd| d}|dk	sh|dk	r|d7 }||dkr| dn| d7 }|dk	s|dk	r|d7 }||dkr| dn| d7 }t | |	|fS )u      Filter cell outliers based on counts and numbers of genes expressed.

    For instance, only keep cells with at least `min_counts` counts or
    `min_genes` genes expressed. This is to filter measurement outliers,
    i.e. “unreliable” observations.

    Only provide one of the optional parameters `min_counts`, `min_genes`,
    `max_counts`, `max_genes` per call.

    Parameters
    ----------
    data
        The (annotated) data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    min_counts
        Minimum number of counts required for a cell to pass filtering.
    min_genes
        Minimum number of genes expressed required for a cell to pass filtering.
    max_counts
        Maximum number of counts required for a cell to pass filtering.
    max_genes
        Maximum number of genes expressed required for a cell to pass filtering.
    inplace
        Perform computation inplace or return result.

    Returns
    -------
    Depending on `inplace`, returns the following arrays or directly subsets
    and annotates the data matrix:

    cells_subset
        Boolean index mask that does filtering. `True` means that the
        cell is kept. `False` means the cell is removed.
    number_per_cell
        Depending on what was tresholded (`counts` or `genes`),
        the array stores `n_counts` or `n_cells` per gene.

    Examples
    --------
    >>> import scanpy as sc
    >>> adata = sc.datasets.krumsiek11()
    >>> adata.n_obs
    640
    >>> adata.var_names
    ['Gata2' 'Gata1' 'Fog1' 'EKLF' 'Fli1' 'SCL' 'Cebpa'
     'Pu.1' 'cJun' 'EgrNab' 'Gfi1']
    >>> # add some true zeros
    >>> adata.X[adata.X < 0.3] = 0
    >>> # simply compute the number of genes per cell
    >>> sc.pp.filter_cells(adata, min_genes=0)
    >>> adata.n_obs
    640
    >>> adata.obs['n_genes'].min()
    1
    >>> # filter manually
    >>> adata_copy = adata[adata.obs['n_genes'] >= 3]
    >>> adata_copy.obs['n_genes'].min()
    >>> adata.n_obs
    554
    >>> adata.obs['n_genes'].min()
    3
    >>> # actually do some filtering
    >>> sc.pp.filter_cells(adata, min_genes=3)
    >>> adata.n_obs
    554
    >>> adata.obs['n_genes'].min()
    3
    ,`copy` is deprecated, use `inplace` instead.c                 s   s   | ]}|d k	V  qd S N .0optionr+   r+   U/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/preprocessing/_simple.py	<genexpr>z   s    zfilter_cells.<locals>.<genexpr>r   zjOnly provide one of the optional parameters `min_counts`, `min_genes`, `max_counts`, `max_genes` per call.Nn_countsZn_genesr   Zaxisfiltered out z cells that have z
less than z genes expressed countsz
more than )loggwarningsum
ValueError
isinstancer   r'   r   filter_cellsXobs_inplace_subset_obsnpr
   A1info)r!   r"   r#   r$   r%   r&   r'   n_given_optionsadatacell_subsetnumberr;   
min_number
max_numberZnumber_per_cellsmsgr+   r+   r/   r:   *   sj    N




 




r:   )r!   r"   	min_cellsr$   	max_cellsr&   r'   r(   c                 C   s  |rt d tdd ||||fD }|dkr8tdt| tr|rN|  n| }tt|j	||||d\}	}
|sz|	|
fS |dkr|dkr|
|j
d< n
|
|j
d	< ||	 |r|S dS | }|dkr|n|}|dkr|n|}tj|dkr|dkr|n|d
kd
d}t|r|j}|dk	r"||k}	|dk	r4||k}	t|	 }|d
krd| d}|dk	sj|dk	r|d7 }||dkr| dn| d7 }|dk	s|dk	r|d7 }||dkr| dn| d7 }t | |	|fS )ua      Filter genes based on number of cells or counts.

    Keep genes that have at least `min_counts` counts or are expressed in at
    least `min_cells` cells or have at most `max_counts` counts or are expressed
    in at most `max_cells` cells.

    Only provide one of the optional parameters `min_counts`, `min_cells`,
    `max_counts`, `max_cells` per call.

    Parameters
    ----------
    data
        An annotated data matrix of shape `n_obs` × `n_vars`. Rows correspond
        to cells and columns to genes.
    min_counts
        Minimum number of counts required for a gene to pass filtering.
    min_cells
        Minimum number of cells expressed required for a gene to pass filtering.
    max_counts
        Maximum number of counts required for a gene to pass filtering.
    max_cells
        Maximum number of cells expressed required for a gene to pass filtering.
    inplace
        Perform computation inplace or return result.

    Returns
    -------
    Depending on `inplace`, returns the following arrays or directly subsets
    and annotates the data matrix

    gene_subset
        Boolean index mask that does filtering. `True` means that the
        gene is kept. `False` means the gene is removed.
    number_per_gene
        Depending on what was tresholded (`counts` or `cells`), the array stores
        `n_counts` or `n_cells` per gene.
    r)   c                 s   s   | ]}|d k	V  qd S r*   r+   r,   r+   r+   r/   r0      s    zfilter_genes.<locals>.<genexpr>r   zjOnly provide one of the optional parameters `min_counts`, `min_cells`, `max_counts`, `max_cells` per call.)rI   r"   rJ   r$   Nr1   Zn_cellsr   r2   r3   z genes that are detected zin less than z cellsr4   zin more than )r5   r6   r7   r8   r9   r   r'   r   filter_genesr;   varZ_inplace_subset_varr>   r
   r?   r@   )r!   r"   rI   r$   rJ   r&   r'   rA   rB   Zgene_subsetrD   r;   rE   rF   Znumber_per_generG   rH   r+   r+   r/   rK      sn    /


	

 




rK   )baser'   chunked
chunk_sizelayerobsmr;   rM   r'   rN   rO   rP   rQ   c                C   s   t ||||d t| ||dS )uj      Logarithmize the data matrix.

    Computes :math:`X = \log(X + 1)`,
    where :math:`log` denotes the natural logarithm unless a different base is given.

    Parameters
    ----------
    X
        The (annotated) data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    base
        Base of the logarithm. Natural logarithm is used by default.
    copy
        If an :class:`~anndata.AnnData` is passed, determines whether a copy
        is returned.
    chunked
        Process the data matrix in chunks, which will save memory.
        Applies only to :class:`~anndata.AnnData`.
    chunk_size
        `n_obs` of the chunks to process the data in.
    layer
        Entry of layers to tranform.
    obsm
        Entry of obsm to transform.

    Returns
    -------
    Returns or updates `data`, depending on `copy`.
    )rN   rO   rP   rQ   r'   rM   )r   log1p_arrayrR   r+   r+   r/   log1p  s    )   rU   rM   r'   c                C   s.   t | dtjtjf|d} t| jd|d| _| S )N)ZcsrZcsc)Zaccept_sparsedtyper'   FrS   )r   r>   Zfloat64float32rU   r!   r;   rM   r'   r+   r+   r/   log1p_sparseL  s      
 rZ   c                C   s   |r*t | jt js | t} qR|  } n(t | jt jsRt | jtsR| t} t j| | d |d k	r~t j	| t 
|| d | S )N)out)r>   
issubdtyperW   Zfloatingastypefloatr'   complexrU   dividelogrY   r+   r+   r/   rT   U  s    

rT   )rM   r'   rN   rO   rP   rQ   r(   c                C   s   d|   krtd |r"|  n| } t|  |rz|d k	sB|d k	rJtd| |D ]"\}}}	t||dd| j||	< qTn,t	| ||d}
t|
d|d}
t
| |
||d d|i| jd< |r| S d S )	NrU   z,adata.X seems to be already log-transformed.zFCurrently cannot perform chunked operations on arrays not stored in X.FrV   rP   rQ   rS   rM   )Zuns_keysr5   r6   r'   r   NotImplementedError	chunked_XrU   r;   r   r   Zuns)rB   rM   r'   rN   rO   rP   rQ   chunkstartendr;   r+   r+   r/   log1p_anndataf  s"    
rh   )r!   r'   rN   rO   r(   c           	      C   s   t | tr`|r|  n| }|rH||D ]\}}}t||j||< q(nt| j|_|r\|S dS | }t|svt|S | S dS )up      Square root the data matrix.

    Computes :math:`X = \sqrt(X)`.

    Parameters
    ----------
    data
        The (annotated) data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    copy
        If an :class:`~anndata.AnnData` object is passed,
        determines whether a copy is returned.
    chunked
        Process the data matrix in chunks, which will save memory.
        Applies only to :class:`~anndata.AnnData`.
    chunk_size
        `n_obs` of the chunks to process the data in.

    Returns
    -------
    Returns or updates `data`, depending on `copy`.
    N)r9   r   r'   rd   sqrtr;   r
   r>   )	r!   r'   rN   rO   rB   re   rf   rg   r;   r+   r+   r/   ri     s    

ri   r1   r+   all)afterr;   )	r!   counts_per_cell_aftercounts_per_cellkey_n_countsr'   layersuse_repr"   r(   c              	   C   s  t | tr$td}|r"|  n| }	|dkr`tt|	j|d\}
}||	j|< |		|
 ||
 }t
|	j|| |dkr|	j n|}|dkr|}n.|dkrt||
 }n|dkrd}ntd|D ]:}t|	j| |d\}}t
|	j| ||dd	}||	j|< qtjd
|d|d |r |	S dS |r2|  n| }|dkrn|sNtdt||d\}
}||
 }||
 }|dkrt|}t Z td ||dk7 }|| }t|s|t|ddtjf  }nt|d|  W 5 Q R X |r|S dS )u      Normalize total counts per cell.

    .. warning::
        .. deprecated:: 1.3.7
            Use :func:`~scanpy.pp.normalize_total` instead.
            The new function is equivalent to the present
            function, except that

            * the new function doesn't filter cells based on `min_counts`,
              use :func:`~scanpy.pp.filter_cells` if filtering is needed.
            * some arguments were renamed
            * `copy` is replaced by `inplace`

    Normalize each cell by total counts over all genes, so that every cell has
    the same total count after normalization.

    Similar functions are used, for example, by Seurat [Satija15]_, Cell Ranger
    [Zheng17]_ or SPRING [Weinreb17]_.

    Parameters
    ----------
    data
        The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond
        to cells and columns to genes.
    counts_per_cell_after
        If `None`, after normalization, each cell has a total count equal
        to the median of the *counts_per_cell* before normalization.
    counts_per_cell
        Precomputed counts per cell.
    key_n_counts
        Name of the field in `adata.obs` where the total counts per cell are
        stored.
    copy
        If an :class:`~anndata.AnnData` is passed, determines whether a copy
        is returned.
    min_counts
        Cells with counts less than `min_counts` are filtered out during
        normalization.

    Returns
    -------
    Returns or updates `adata` with normalized version of the original
    `adata.X`, depending on `copy`.

    Examples
    --------
    >>> import scanpy as sc
    >>> adata = AnnData(np.array([[1, 0], [3, 0], [5, 6]]))
    >>> print(adata.X.sum(axis=1))
    [  1.   3.  11.]
    >>> sc.pp.normalize_per_cell(adata)
    >>> print(adata.obs)
    >>> print(adata.X.sum(axis=1))
       n_counts
    0       1.0
    1       3.0
    2      11.0
    [ 3.  3.  3.]
    >>> sc.pp.normalize_per_cell(
    >>>     adata, counts_per_cell_after=1,
    >>>     key_n_counts='n_counts2',
    >>> )
    >>> print(adata.obs)
    >>> print(adata.X.sum(axis=1))
       n_counts  n_counts2
    0       1.0        3.0
    1       3.0        3.0
    2      11.0        3.0
    [ 1.  1.  1.]
    z#normalizing by total count per cellN)r"   rj   rk   r;   z&use_rep should be "after", "X" or NoneTr'   z>    finished ({time_passed}): normalized adata.X and added    z2, counts per cell before normalization (adata.obs)timezCan only be run with copy=Trueignorer   r   )r9   r   r5   r@   r'   r   r:   r;   r<   r=   normalize_per_cellro   keysr>   Zmedianr8   warningscatch_warningssimplefilterr
   Znewaxisr   Zinplace_row_scale)r!   rl   rm   rn   r'   ro   rp   r"   rf   rB   rC   rk   rP   Zsubsetcountstempr;   r+   r+   r/   ru     sZ    Q









ru   )rB   rv   n_jobsr'   r(   c                    sp  t d| }t| jr$t d |r0|  n| } t|  | jrP| |   t|t	r`|g}t| jrv| j
 | _|dkrtjn|}d}|d |  kr@t| j|d  r@t|dkrtdt d tj| jjd	d
}| j|d  jjD ]D}|| j|d  kj}t| jjD ]\}	}
|
|  |||	f< qqd}n*|rR| j| }n
| j }|ddd ttd| jjd | t }t| jjd | t }g }tj!| j|dd}|rtj!||dd}t|D ]2\}}|r|| }n|}|"t#|||f qddl$m%}m&  ||d fdd|D }t'|j| jj(| _t jd|d |rl| S dS )a      Regress out (mostly) unwanted sources of variation.

    Uses simple linear regression. This is inspired by Seurat's `regressOut`
    function in R [Satija15]. Note that this function tends to overcorrect
    in certain circumstances as described in :issue:`526`.

    Parameters
    ----------
    adata
        The annotated data matrix.
    keys
        Keys for observation annotation on which to regress on.
    n_jobs
        Number of jobs for parallel computation.
        `None` means using :attr:`scanpy._settings.ScanpyConfig.n_jobs`.
    copy
        Determines whether a copy of `adata` is returned.

    Returns
    -------
    Depending on `copy` returns or updates `adata` with the corrected data matrix.
    zregressing out z=    sparse input is densified and may lead to high memory useNFr   r   zwIf providing categorical variable, only a single one is allowed. For this one we regress on the mean for each category.z2... regressing on per-gene means within categoriesrX   )rW   Tonesg      ?i  r2   )Paralleldelayed)r|   c                 3   s   | ]} t |V  qd S r*   )_regress_out_chunk)r-   taskr   r+   r/   r0     s     zregress_out.<locals>.<genexpr>z    finishedrr   ))r5   r@   r
   r;   r'   r   Zis_viewZ_init_as_actualr9   strtoarraysettr|   Zobs_keysr   r<   lenr8   debugr>   zerosshapecat
categoriesvalues	enumerateTmeaninsertceilminr]   intZarray_splitappendtupleZjoblibr~   r   vstackrW   )rB   rv   r|   r'   rf   variable_is_categorical
regressorscategorymaskZixxZ	len_chunkZn_chunkstasksZ
chunk_listZregressors_chunkidx
data_chunkregresr~   resr+   r   r/   regress_out:  sZ    



&

"
r   c              	   C   s&  | d }| d }| d }g }dd l m} ddlm} t|jd D ]}|d d |f |d|f k s~||d d |f  qB|rtj	t
|jd |d d |f f }n|}z0|j|d d |f ||j d }	|	j}
W n0 |k
r   td t|jd }
Y nX ||
 qBt|S )Nr   r   r   )PerfectSeparationError)familyz9Encountered PerfectSeparationError, setting to 0 as in R.)Zstatsmodels.apiapiZstatsmodels.tools.sm_exceptionsr   ranger   anyr   r>   Zc_r}   ZGLMZfamiliesZGaussianfitZresid_responser5   r6   r   r   )r!   r   r   r   Zresponses_chunk_listsmr   Z	col_indexr   resultZ
new_columnr+   r+   r/   r     s2     (  


r   r;   zero_center	max_valuer'   rP   rQ   c                 C   sP   t ||d |dk	r&tdt|  |dk	r@tdt|  t| |||dS )uQ      Scale data to unit variance and zero mean.

    .. note::
        Variables (genes) that do not display any variation (are constant across
        all observations) are retained and (for zero_center==True) set to 0
        during this operation. In the future, they might be set to NaNs.

    Parameters
    ----------
    X
        The (annotated) data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    zero_center
        If `False`, omit zero-centering variables, which allows to handle sparse
        input efficiently.
    max_value
        Clip (truncate) to this value after scaling. If `None`, do not clip.
    copy
        Whether this function should be performed inplace. If an AnnData object
        is passed, this also determines if a copy is returned.
    layer
        If provided, which element of layers to scale.
    obsm
        If provided, which element of obsm to scale.

    Returns
    -------
    Depending on `copy` returns or updates `adata` with a scaled `adata.X`,
    annotated with `'mean'` and `'std'` in `adata.var`.
    rb   Nz1`layer` argument inappropriate for value of type z0`obsm` argument inappropriate for value of type )r   r   r'   )r   r8   typescale_arrayr   r+   r+   r/   scale  s    (r   r   r   r'   return_mean_stdc                C   s   |r|   } |s"|d k	r"td t| jtjrFtd | t} t	| \}}t
|}d||dk< t| r|r|tdt| d|  n|r| |8 } | | } |d k	rtd|  || | |k< |r| ||fS | S d S )Nz<... be careful when using `max_value` without `zero_center`.zV... as scaling leads to float results, integer input is cast to float, returning copy.r   r   z!Cannot zero-center sparse matrix.z... clipping at max_value )r'   r5   r@   r>   r\   rW   integerr]   r^   r   ri   r
   r8   r   Zinplace_column_scaler   )r;   r   r   r'   r   r   rL   stdr+   r+   r/   r     s6    	


r   c                C   s,   |rt d |  } d}t| ||||dS )Nz]... as `zero_center=True`, sparse input is densified and may lead to large memory consumptionF)r   r'   r   r   )r5   r@   r   r   )r;   r   r   r'   r   r+   r+   r/   scale_sparse&  s    
r   )r   r   r'   rP   rQ   )rB   r   r   r'   rP   rQ   r(   c                C   sf   |r|   n| } t|  t| ||d}t|||ddd\}| jd< | jd< t| |||d |rb| S d S )Nrb   FTr   r   r   )r'   r   r   r   rL   r   )rB   r   r   r'   rP   rQ   r;   r+   r+   r/   scale_anndata@  s    
r   )r!   fractionn_obsrandom_stater'   r(   c           	      C   s   t j| t| tr| jn| jd }|dk	r4|}nN|dk	rz|dksL|dk rZtd| t|| }t	
d| d ntdt jj||dd	}t| tr|r| |  S | | n| }|| |fS dS )
u      Subsample to a fraction of the number of observations.

    Parameters
    ----------
    data
        The (annotated) data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    fraction
        Subsample to this `fraction` of the number of observations.
    n_obs
        Subsample to this number of observations.
    random_state
        Random seed to change subsampling.
    copy
        If an :class:`~anndata.AnnData` is passed,
        determines whether a copy is returned.

    Returns
    -------
    Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
    subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
    returns a subsampled copy of it (`copy == True`).
    r   Nr   z*`fraction` needs to be within [0, 1], not z... subsampled to z data pointsz"Either pass `n_obs` or `fraction`.F)sizereplace)r>   randomseedr9   r   r   r   r8   r   r5   r   choicer'   r=   )	r!   r   r   r   r'   Z	old_n_obsZ	new_n_obsZobs_indicesr;   r+   r+   r/   	subsampleY  s"    
r   Ztarget_countsrm   )r   r   r'   )rB   rm   total_countsr   r   r'   r(   c                C   sf   |dk	}|dk	}||kr t d|r,|  } |rDt| j|||| _n|rZt| j|||| _|rb| S dS )a      Downsample counts from count matrix.

    If `counts_per_cell` is specified, each cell will downsampled.
    If `total_counts` is specified, expression matrix will be downsampled to
    contain at most `total_counts`.

    Parameters
    ----------
    adata
        Annotated data matrix.
    counts_per_cell
        Target total counts per cell. If a cell has more than 'counts_per_cell',
        it will be downsampled to this number. Resulting counts can be specified
        on a per cell basis by passing an array.Should be an integer or integer
        ndarray with same length as number of obs.
    total_counts
        Target total counts. If the count matrix has more than `total_counts`
        it will be downsampled to have this number.
    random_state
        Random seed for subsampling.
    replace
        Whether to sample the counts with replacement.
    copy
        Determines whether a copy of `adata` is returned.

    Returns
    -------
    Depending on `copy` returns or updates an `adata` with downsampled `.X`.
    Nz@Must specify exactly one of `total_counts` or `counts_per_cell`.)r8   r'   _downsample_total_countsr;   _downsample_per_cell)rB   rm   r   r   r   r'   Ztotal_counts_callZcounts_per_cell_callr+   r+   r/   downsample_counts  s    )r   c                 C   sT  | j d }t|tr"t||}n
t|}|jtjdd}t|tjrTt	||kr\t
dt| rt| }t| s|t| } t| jdd}t||kd }t| j| jdd }|D ]"}	||	 }
t|
||	 ||dd	 q|   |tk	r|| } nVt| jdd}t||kd }|D ],}	| |	d d f }
t|
||	 ||dd	 q"| S )
Nr   Frq   zIf provided, 'counts_per_cell' must be either an integer, or coercible to an `np.ndarray` of length as number of observations by `np.asarray(counts_per_cell)`.r   r2   Tr   r   r&   )r   r9   r   r>   fullZasarrayr]   int_ndarrayr   r8   r
   r   r   r   Zravelr7   Znonzerosplitr!   Zindptr_downsample_arrayeliminate_zeros)r;   rm   r   r   r   original_typeZtotalsZunder_targetrowsZrowidxrowr+   r+   r/   r     sP    




r   c                 C   s   t |}|  }||k r| S t| rjt| }t| s<t| } t| j|||dd |   |tk	r|| } n$| 	t
j| j }t||||dd | S )NTr   )r   r&   )r   r7   r
   r   r   r   r   r!   r   Zreshaper>   multiplyr   )r;   r   r   r   totalr   vr+   r+   r/   r     s*    
r   )cache)coltargetr   r   r&   c           
      C   s   t j| |  }|r&d| dd< n
t | } t |d }t jj|||d}|  d}|D ]*}	|	|| krz|d7 }qd| |  d7  < q`| S )z    Evenly reduce counts in cell to target amount.

    This is an internal function and has some restrictions:

    * total counts in cell must be less than target
    r   Nr   )r   r   )r>   r   r   ZcumsumZ
zeros_liker   r   sort)
r   r   r   r   r&   Z	cumcountsr   sampleZgeneptrcountr+   r+   r/   r     s    

r   c                 C   s   | | j dd8 } tj| dd}tjjj||d\}}t|d d d }|d d |f }|| }|d d d |f }t|j	| j	j	S )Nr   r2   F)Zrowvar)kr   )
r   r>   ZcovspsparseZlinalgZeigshZargsortdotr   )r!   Zn_compsCZevalsZevecsZidcsr+   r+   r/   _pca_fallback5  s    r   )NNNNTF)NNNNTF)FFN)NNr1   Fr+   Nr   )NF)TNFNN)NNr   F)NN)r   TF)r   )S__doc__	functoolsr   numbersr   rw   typingr   r   r   r   r   r	   Znumbanumpyr>   Zscipyr   Zscipy.sparser
   r   r   r   Zsklearn.utilsr   r   Zpandas.api.typesr   Zanndatar    r   r5   Z	_settingsr   r   _utilsr   r   r   r   r   Z_compatr   getr   r   Z_distributedr   r   Z
dask.arrayarraydaImportErrorZ!_deprecated.highly_variable_genesr    r   boolr   r:   rK   r   rU   registerrZ   rT   rh   ri   r^   ru   r   r   r   r   r   r   r   r   r   r   Znjitr   r   r+   r+   r+   r/   <module>   s   
       
      l.
#   .       
 
  i%     /
-    5
  80
   $