U
    md,                     @   s   d Z ddlmZmZmZ ddlmZ ddlmZ ddl	m
Z ddlmZ ddlmZ dd	lmZ d
Zdddddddddddddeeed ee df eee ee eed ef ee ed eeee eee ee dddZdS )z*Denoise high-dimensional data using MAGIC
    )UnionSequenceOptional)AnnData)version   )logging)settings)Literal)	AnyRandomz2.0N      d   exactZ	euclideanF)knndecayknn_maxtn_pcasolverknn_distrandom_staten_jobsverbosecopy)	all_genespca_onlyauto)r   Zapproximate)adata	name_listr   r   r   r   r   r   r   r   r   r   r   returnc                K   sh  zddl m}m} W n tk
r0   tdY n,X t|ttks\tdt d| dtd}t	|t
tdf}|r|d	krtd
|dkr| }n|s|std| d|r|  n| } |
dkrtjn|
}
|f ||||||||	|
|d
|j| |d}tjd||dkrdndd |dkr:|j| jd< n |rL| |_|} n| | _|j| _|rd| S dS )a      Markov Affinity-based Graph Imputation of Cells (MAGIC) API [vanDijk18]_.

    MAGIC is an algorithm for denoising and transcript recover of single cells
    applied to single-cell sequencing data. MAGIC builds a graph from the data
    and uses diffusion to smooth out noise and recover the data manifold.

    The algorithm implemented here has changed primarily in two ways
    compared to the algorithm described in [vanDijk18]_. Firstly, we use
    the adaptive kernel described in Moon et al, 2019 [Moon17]_ for
    improved stability. Secondly, data diffusion is applied
    in the PCA space, rather than the data space, for speed and
    memory improvements.

    More information and bug reports
    `here <https://github.com/KrishnaswamyLab/MAGIC>`__. For help, visit
    <https://krishnaswamylab.org/get-help>.

    Parameters
    ----------
    adata
        An anndata file with `.raw` attribute representing raw counts.
    name_list
        Denoised genes to return. The default `'all_genes'`/`None`
        may require a large amount of memory if the input data is sparse.
        Another possibility is `'pca_only'`.
    knn
        number of nearest neighbors on which to build kernel.
    decay
        sets decay rate of kernel tails.
        If None, alpha decaying kernel is not used.
    knn_max
        maximum number of nearest neighbors with nonzero connection.
        If `None`, will be set to 3 * `knn`.
    t
        power to which the diffusion operator is powered.
        This sets the level of diffusion. If 'auto', t is selected
        according to the Procrustes disparity of the diffused data.
    n_pca
        Number of principal components to use for calculating
        neighborhoods. For extremely large datasets, using
        n_pca < 20 allows neighborhoods to be calculated in
        roughly log(n_samples) time. If `None`, no PCA is performed.
    solver
        Which solver to use. "exact" uses the implementation described
        in van Dijk et al. (2018) [vanDijk18]_. "approximate" uses a faster
        implementation that performs imputation in the PCA space and then
        projects back to the gene space. Note, the "approximate" solver may
        return negative values.
    knn_dist
        recommended values: 'euclidean', 'cosine', 'precomputed'
        Any metric from `scipy.spatial.distance` can be used
        distance metric for building kNN graph. If 'precomputed',
        `data` should be an n_samples x n_samples distance or
        affinity matrix.
    random_state
        Random seed. Defaults to the global `numpy` random number generator.
    n_jobs
        Number of threads to use in training. All cores are used by default.
    verbose
        If `True` or an integer `>= 2`, print status messages.
        If `None`, `sc.settings.verbosity` is used.
    copy
        If true, a copy of anndata is returned. If `None`, `copy` is True if
        `genes` is not `'all_genes'` or `'pca_only'`. `copy` may only be False
        if `genes` is `'all_genes'` or `'pca_only'`, as the resultant data
        will otherwise have different column names from the input data.
    kwargs
        Additional arguments to `magic.MAGIC`.

    Returns
    -------
    If `copy` is True, AnnData object is returned.

    If `subset_genes` is not `all_genes`, PCA on MAGIC values of cells are
    stored in `adata.obsm['X_magic']` and `adata.X` is not modified.

    The raw counts are stored in `.raw` attribute of AnnData object.

    Examples
    --------
    >>> import scanpy as sc
    >>> import scanpy.external as sce
    >>> adata = sc.datasets.paul15()
    >>> sc.pp.normalize_per_cell(adata)
    >>> sc.pp.sqrt(adata)  # or sc.pp.log1p(adata)
    >>> adata_magic = sce.pp.magic(adata, name_list=['Mpo', 'Klf1', 'Ifitm1'], knn=5)
    >>> adata_magic.shape
    (2730, 3)
    >>> sce.pp.magic(adata, name_list='pca_only', knn=5)
    >>> adata.obsm['X_magic'].shape
    (2730, 100)
    >>> sce.pp.magic(adata, name_list='all_genes', knn=5)
    >>> adata.X.shape
    (2730, 3451)
    r   )MAGIC__version__zxPlease install magic package via `pip install --user git+git://github.com/KrishnaswamyLab/MAGIC.git#subdirectory=python`z!scanpy requires magic-impute >= vz (detected: vzN). Please update magic package via `pip install --user --upgrade magic-impute`zcomputing MAGICN>   Nr   r   zVInvalid string value for `name_list`: Only `'all_genes'` and `'pca_only'` are allowed.z]Can only perform MAGIC in-place with `name_list=='all_genes' or `name_list=='pca_only'` (got z). Consider setting `copy=True`)
r   r   r   r   r   r   r   r   r   r   )Zgenesz    finishedr   z:added
    'X_magic', PCA on MAGIC coordinates (adata.obsm) )timedeepX_magic)magicr!   r"   ImportErrorr   parseMIN_VERSIONlogginfo
isinstancestrtype
ValueErrorr   r	   r   Zfit_transformXZobsmraw)r   r   r   r   r   r   r   r   r   r   r   r   r   kwargsr!   r"   startZ
all_or_pcar&    r5   R/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/external/pp/_magic.pyr'      sr    r


 

r'   )N)__doc__typingr   r   r   Zanndatar   	packagingr   r#   r   r+   Z	_settingsr	   Z_compatr
   _utilsr   r*   r.   intfloatboolr'   r5   r5   r5   r6   <module>   sJ    