U
    md                     @   s   d Z ddlmZmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZmZmZ eeed
de	eee ee eee  ee eeeee eeeeef  dddZdS )zA
Computes a dendrogram based on a given categorical observation.
    )OptionalSequenceDictAnyN)AnnData)is_categorical_dtype   )logging)_doc_params)_choose_representationdoc_use_rep	doc_n_pcs)n_pcsuse_reppearsoncompleteFT)adatagroupbyr   r   	var_namesuse_raw
cor_methodlinkage_methodoptimal_ordering	key_addedinplacereturnc                 C   s  t |tr|g}|D ]R}||  kr<td| d|   t| j| std| d| j| j q|dkrtt	| ||d}| j|d  }t
|dkr|dd D ](}|td	 | j| t d
}qd	||_|j|dd |jj}n2|r| jjn| j}ddlm} || |||\}}|jdd }ddlm  m} ddlm} |jj|d}|d| }|j|||d}|j |t!|dd}t"||||||d |d ||j#d	}|
r|	dkrdd	| }	t$%d|	d || j&|	< n|S dS )a	      Computes a hierarchical clustering for the given `groupby` categories.

    By default, the PCA representation is used unless `.X`
    has less than 50 variables.

    Alternatively, a list of `var_names` (e.g. genes) can be given.

    Average values of either `var_names` or components are used
    to compute a correlation matrix.

    The hierarchical clustering can be visualized using
    :func:`scanpy.pl.dendrogram` or multiple other visualizations that can
    include a dendrogram: :func:`~scanpy.pl.matrixplot`,
    :func:`~scanpy.pl.heatmap`, :func:`~scanpy.pl.dotplot`,
    and :func:`~scanpy.pl.stacked_violin`.

    .. note::
        The computation of the hierarchical clustering is based on predefined
        groups and not per cell. The correlation matrix is computed using by
        default pearson but other methods are available.

    Parameters
    ----------
    adata
        Annotated data matrix
    {n_pcs}
    {use_rep}
    var_names
        List of var_names to use for computing the hierarchical clustering.
        If `var_names` is given, then `use_rep` and `n_pcs` is ignored.
    use_raw
        Only when `var_names` is not None.
        Use `raw` attribute of `adata` if present.
    cor_method
        correlation method to use.
        Options are 'pearson', 'kendall', and 'spearman'
    linkage_method
        linkage method to use. See :func:`scipy.cluster.hierarchy.linkage`
        for more information.
    optimal_ordering
        Same as the optimal_ordering argument of :func:`scipy.cluster.hierarchy.linkage`
        which reorders the linkage matrix so that the distance between successive
        leaves is minimal.
    key_added
        By default, the dendrogram information is added to
        `.uns[f'dendrogram_{{groupby}}']`.
        Notice that the `groupby` information is added to the dendrogram.
    inplace
        If `True`, adds dendrogram information to `adata.uns[key_added]`,
        else this function returns the information.

    Returns
    -------
    If `inplace=False`, returns dendrogram information,
    else `adata.uns[key_added]` is updated with it.

    Examples
    --------
    >>> import scanpy as sc
    >>> adata = sc.datasets.pbmc68k_reduced()
    >>> sc.tl.dendrogram(adata, groupby='bulk_labels')
    >>> sc.pl.dendrogram(adata)
    >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ']
    >>> sc.pl.dotplot(adata, markers, groupby='bulk_labels', dendrogram=True)
    z4groupby has to be a valid observation. Given value: z, valid observations: z:groupby has to be a categorical observation. Given value: z, Column type: N)r   r   r      _categoryT)r   r   )_prepare_dataframe)level)distance)method)r"   r   )labelsZno_plotZivlleaves)	linkager   r   r   r   Zcategories_orderedZcategories_idx_orderedZdendrogram_infoZcorrelation_matrixZdendrogram_z$Storing dendrogram info using `.uns[z]`)'
isinstancestrZobs_keys
ValueErrorr   ZobsZdtypepdZ	DataFramer   lenZastypejoinnameZ	set_indexindex
categoriesrawr   Zplotting._anndatar   r   ZmeanZscipy.cluster.hierarchyZclusterZ	hierarchyZscipy.spatialr!   TZcorrZ
squareformr%   
dendrogramlistdictvalueslogginfoZuns)r   r   r   r   r   r   r   r   r   r   r   groupZrep_dfZcategoricalr.   Z
gene_namesr   Zmean_dfZschr!   Zcorr_matrixZcorr_condensedZz_varZdendro_infoZdat r8   Q/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/tools/_dendrogram.pyr1      sp    P

  
r1   )	NNNNr   r   FNT)__doc__typingr   r   r   r   Zpandasr)   Zanndatar   Zpandas.api.typesr    r	   r5   _utilsr
   Ztools._utilsr   r   r   r'   intboolr1   r8   r8   r8   r9   <module>   s>   
         
