U
    md!                     @   s   d Z ddlmZmZmZmZmZ ddlZddl	Z
ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ deeejef eed  eeeeeed eeeed eee  eeeee eeeeej eee f dddZdS )z$Perform clustering using PhenoGraph
    )UnionTupleOptionalTypeAnyN)AnnData)spmatrix   )Literal)MutableVertexPartition)logginglouvain   F
   T	euclideanMbP?  kdtree   r   Zleiden)r   Z	manhattanZcorrelationZcosine)r   Zbrute)adataclustering_algokdirectedprunemin_cluster_sizejaccardprimary_metricn_jobsq_tollouvain_time_limit	nn_methodpartition_typeresolution_parametern_iterationsuse_weightsseedcopykargsreturnc                 K   sN  t d}zddl}|jdks"tW n" tttfk
rF   tdY nX t| trz| j	d }W q t
k
r|   t
dY qX n| }d}|d	krd
|nd}d|rdnd}d|rdnd}|jf ||||||||||	|
||||||d|\}}}t jd|d |r|||fS || j|< |r:t|| j|< |rJ|| j|< dS )a:      PhenoGraph clustering [Levine15]_.

    **PhenoGraph** is a clustering method designed for high-dimensional single-cell
    data. It works by creating a graph ("network") representing phenotypic similarities
    between cells and then identifying communities in this graph. It supports both
    Louvain_ and Leiden_ algorithms for community detection.

    .. _Louvain: https://louvain-igraph.readthedocs.io/en/latest/

    .. _Leiden: https://leidenalg.readthedocs.io/en/latest/reference.html

    .. note::
       More information and bug reports `here
       <https://github.com/dpeerlab/PhenoGraph>`__.

    Parameters
    ----------
    adata
        AnnData, or Array of data to cluster, or sparse matrix of k-nearest neighbor
        graph. If ndarray, n-by-d array of n cells in d dimensions. if sparse matrix,
        n-by-n adjacency matrix.
    clustering_algo
        Choose between `'Louvain'` or `'Leiden'` algorithm for clustering.
    k
        Number of nearest neighbors to use in first step of graph construction.
    directed
        Whether to use a symmetric (default) or asymmetric (`'directed'`) graph.
        The graph construction process produces a directed graph, which is symmetrized
        by one of two methods (see `prune` below).
    prune
        `prune=False`, symmetrize by taking the average between the graph and its
        transpose. `prune=True`, symmetrize by taking the product between the graph
        and its transpose.
    min_cluster_size
        Cells that end up in a cluster smaller than min_cluster_size are considered
        outliers and are assigned to -1 in the cluster labels.
    jaccard
        If `True`, use Jaccard metric between k-neighborhoods to build graph. If
        `False`, use a Gaussian kernel.
    primary_metric
        Distance metric to define nearest neighbors. Note that performance will be
        slower for correlation and cosine.
    n_jobs
        Nearest Neighbors and Jaccard coefficients will be computed in parallel using
        n_jobs. If 1 is given, no parallelism is used. If set to -1, all CPUs are used.
        For n_jobs below -1, `n_cpus + 1 + n_jobs` are used.
    q_tol
        Tolerance, i.e. precision, for monitoring modularity optimization.
    louvain_time_limit
        Maximum number of seconds to run modularity optimization. If exceeded the best
        result so far is returned.
    nn_method
        Whether to use brute force or kdtree for nearest neighbor search.
        For very large high-dimensional data sets, brute force, with parallel
        computation, performs faster than kdtree.
    partition_type
        Defaults to :class:`~leidenalg.RBConfigurationVertexPartition`. For the
        available options, consult the documentation for
        :func:`~leidenalg.find_partition`.
    resolution_parameter
        A parameter value controlling the coarseness of the clustering in Leiden. Higher
        values lead to more clusters. Set to `None` if overriding `partition_type` to
        one that does not accept a `resolution_parameter`.
    n_iterations
        Number of iterations to run the Leiden algorithm. If the number of iterations is
        negative, the Leiden algorithm is run until an iteration in which there was no
        improvement.
    use_weights
        Use vertices in the Leiden computation.
    seed
        Leiden initialization of the optimization.
    copy
        Return a copy or write to `adata`.
    kargs
        Additional arguments passed to :func:`~leidenalg.find_partition` and the
        constructor of the `partition_type`.

    Returns
    -------
    Depending on `copy`, returns or updates `adata` with the following fields:

    **communities** - :class:`~numpy.ndarray` (:attr:`~anndata.AnnData.obs`, dtype `int`)
        integer array of community assignments for each row in data.

    **graph** - :class:`~scipy.sparse.spmatrix` (:attr:`~anndata.AnnData.obsp`, dtype `float`)
        the graph that was used for clustering.

    **Q** - `float` (:attr:`~anndata.AnnData.uns`, dtype `float`)
        the modularity score for communities on graph.

    Example
    -------
    >>> from anndata import AnnData
    >>> import scanpy as sc
    >>> import scanpy.external as sce
    >>> import numpy as np
    >>> import pandas as pd

    With annotated data as input:

    >>> adata = sc.datasets.pbmc3k()
    >>> sc.pp.normalize_per_cell(adata)

    Then do PCA:

    >>> sc.tl.pca(adata, n_comps=100)

    Compute phenograph clusters:

    **Louvain** community detection

    >>> sce.tl.phenograph(adata, clustering_algo="louvain", k=30)

    **Leiden** community detection

    >>> sce.tl.phenograph(adata, clustering_algo="leiden", k=30)

    Return only `Graph` object

    >>> sce.tl.phenograph(adata, clustering_algo=None, k=30)

    Now to show phenograph on tSNE (for example):

    Compute tSNE:

    >>> sc.tl.tsne(adata, random_state=7)

    Plot phenograph clusters on tSNE:

    >>> sc.pl.tsne(
    ...     adata, color = ["pheno_louvain", "pheno_leiden"], s = 100,
    ...     palette = sc.pl.palettes.vega_20_scanpy, legend_fontsize = 10
    ... )

    Cluster and cluster centroids for input Numpy ndarray

    >>> df = np.random.rand(1000, 40)
    >>> dframe = pd.DataFrame(df)
    >>> dframe.index, dframe.columns = (map(str, dframe.index), map(str, dframe.columns))
    >>> adata = AnnData(dframe)
    >>> sc.tl.pca(adata, n_comps=20)
    >>> sce.tl.phenograph(adata, clustering_algo="leiden", k=50)
    >>> sc.tl.tsne(adata, random_state=1)
    >>> sc.pl.tsne(
    ...     adata, color=['pheno_leiden'], s=100,
    ...     palette=sc.pl.palettes.vega_20_scanpy, legend_fontsize=10
    ... )
    zPhenoGraph clusteringr   Nz1.5.3zKplease install the latest release of phenograph:
	pip install -U PhenoGraphZX_pcaz0Please run `sc.tl.pca` on `adata` and try again!Tr   zpheno_{} zpheno_{}_igr   Zgaussianz
pheno_{}_q)datar   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   z    finished)time)logginfo
phenograph__version__AssertionErrorImportErrorAttributeError
isinstancer   ZobsmKeyErrorformatZclusterZobsppdZCategoricalZobsZuns)r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   startr0   r,   Zcomm_keyZig_keyZq_keyZcommunitiesgraphQ r<   W/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/external/tl/_phenograph.pyr0      sf     0




r0   )r   r   FFr   Tr   r   r   r   r   Nr   r   TNF)__doc__typingr   r   r   r   r   numpynpZpandasr8   Zanndatar   Zscipy.sparser   Z_compatr
   Ztools._leidenr   r+   r   r.   Zndarrayintboolfloatr0   r<   r<   r<   r=   <module>   sb                    

