U
    mdy`                     @   s   d dl mZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ d
dhZde	ee eed ee edddZG dd dZe	ee dddZee dddZG dd deZd e	e	eee edddZdS )!    )ListOptional
NamedTupleN)AnnData)minimum_spanning_tree   )_utils)logging)	Neighbors)Literalv1.0v1.2F)r   r   )adatagroupsuse_rna_velocitymodelneighbors_keycopyc           
      C   sV  |dkrdn|}|| j kr"td|dkrHdD ]}|| jjkr.|} qHq.|dkrZtdn|| jjkrvtd|d|r|  n| } t|  t	d}t
| |||d	}	d
| j kri | j d
< |s|	  |	j| j d
 d< |	j| j d
 d< t|	j| j |d < n|	  |	j| j d
 d< || j d
 d< tj	d|d|r>dnd d |rR| S dS )a      Mapping out the coarse-grained connectivity structures of complex manifolds [Wolf19]_.

    By quantifying the connectivity of partitions (groups, clusters) of the
    single-cell graph, partition-based graph abstraction (PAGA) generates a much
    simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
    represent confidence in the presence of connections. By tresholding this
    confidence in :func:`~scanpy.pl.paga`, a much simpler representation of the
    manifold data is obtained, which is nonetheless faithful to the topology of
    the manifold.

    The confidence should be interpreted as the ratio of the actual versus the
    expected value of connetions under the null model of randomly connecting
    partitions. We do not provide a p-value as this null model does not
    precisely capture what one would consider "connected" in real data, hence it
    strongly overestimates the expected value. See an extensive discussion of
    this in [Wolf19]_.

    .. note::
        Note that you can use the result of :func:`~scanpy.pl.paga` in
        :func:`~scanpy.tl.umap` and :func:`~scanpy.tl.draw_graph` via
        `init_pos='paga'` to get single-cell embeddings that are typically more
        faithful to the global topology.

    Parameters
    ----------
    adata
        An annotated data matrix.
    groups
        Key for categorical in `adata.obs`. You can pass your predefined groups
        by choosing any categorical annotation of observations. Default:
        The first present key of `'leiden'` or `'louvain'`.
    use_rna_velocity
        Use RNA velocity to orient edges in the abstracted graph and estimate
        transitions. Requires that `adata.uns` contains a directed single-cell
        graph with key `['velocity_graph']`. This feature might be subject
        to change in the future.
    model
        The PAGA connectivity model.
    neighbors_key
        If not specified, paga looks `.uns['neighbors']` for neighbors settings
        and `.obsp['connectivities']`, `.obsp['distances']` for connectivities and
        distances respectively (default storage places for `pp.neighbors`).
        If specified, paga looks `.uns[neighbors_key]` for neighbors settings and
        `.obsp[.uns[neighbors_key]['connectivities_key']]`,
        `.obsp[.uns[neighbors_key]['distances_key']]` for connectivities and distances
        respectively.
    copy
        Copy `adata` before computation and return a copy. Otherwise, perform
        computation inplace and return `None`.

    Returns
    -------
    **connectivities** : :class:`numpy.ndarray` (adata.uns['connectivities'])
        The full adjacency matrix of the abstracted graph, weights correspond to
        confidence in the connectivities of partitions.
    **connectivities_tree** : :class:`scipy.sparse.csr_matrix` (adata.uns['connectivities_tree'])
        The adjacency matrix of the tree-like subgraph that best explains
        the topology.

    Notes
    -----
    Together with a random walk-based distance measure
    (e.g. :func:`scanpy.tl.dpt`) this generates a partial coordinatization of
    data useful for exploring and explaining its variation.

    .. currentmodule:: scanpy

    See Also
    --------
    pl.paga
    pl.paga_path
    pl.paga_compare
    N	neighborszEYou need to run `pp.neighbors` first to compute a neighborhood graph.)ZleidenZlouvainznYou need to run `tl.leiden` or `tl.louvain` to compute community labels, or specify `groups='an_existing_key'`z`groups` key z not found in `adata.obs`.zrunning PAGA)r   r   pagaconnectivitiesconnectivities_treeZ_sizestransitions_confidencer   z    finishedzadded
zG    'paga/transitions_confidence', connectivities adjacency (adata.uns)z    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns))timedeep)uns
ValueErrorobscolumnsKeyErrorr   r   Zsanitize_anndatalogginfoPAGAcompute_connectivitiesr   r   nparraynscompute_transitionsr   )
r   r   r   r   r   r   Zcheck_neighborskstartr    r*   K/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/tools/_paga.pyr      sP    R




r   c                   @   sN   e Zd ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dS )r"   r   Nc                 C   s4   ||j jkst|| _t||d| _|| _|| _d S )N)r   )r   r   AssertionError_adatar
   
_neighbors_model_groups_key)selfr   r   r   r   r*   r*   r+   __init__   s
    zPAGA.__init__c                 C   s@   | j dkr|  S | j dkr$|  S td| j  dt dd S )Nr   r   z`model` z needs to be one of .)r/   _compute_connectivities_v1_2_compute_connectivities_v1_0r   _AVAIL_MODELS)r1   r*   r*   r+   r#      s    

zPAGA.compute_connectivitiesc                    s|  dd l }| jj }tt|j|_tj	|dd}|j
|| jj| j jjjd   }t|} fddtt|D } jdd}tj|d	d
}t||jddj }	||j }| }
| }| }t|j|j|jD ]p\}}}|	| ||  |	| ||   |d  }|dkr(|| }nd}|dkr:d}||
||f< ||||f< q|| _|| _|
| _|   | _!|" |
fS )Nr   TZdirectedZ
membershipc                    s   g | ]}  | qS r*   )ZsubgraphZecount.0ivcr*   r+   
<listcomp>   s     z5PAGA._compute_connectivities_v1_2.<locals>.<listcomp>sumZcombine_edgesweightZweight_attr   )axis)#igraphr.   Z	distancesr   r$   oneslendatar   get_igraph_from_adjacencyVertexClusteringr-   r   r0   catcodesvaluessizesr?   rangecluster_graphget_sparse_from_igraphr%   A1Ttocooziprowcolr&   Zexpected_n_edges_randomr   _get_connectivities_tree_v1_2r   tocsr)r1   rE   rF   gr&   nZes_inner_clustercginter_esesr   Zexpected_n_edgesr;   jvZexpected_random_nullscaled_valuer*   r<   r+   r4      s@     
(



z!PAGA._compute_connectivities_v1_2c                 C   s  dd l }| jj }tt|j|_t	|}|j
|| jj| j jjjd}| }|jdd}tj|ddd }| }| }| jjd }	t|j|j|jD ]F\}
}}t|	||
  ||  }|dkr|| }nd}|||
|f< q|| _|| _| || _| |fS )	Nr   r8   r?   r@   rA   rB   r   rC   )rE   r.   r   r   r$   rF   rG   rH   r   rI   rJ   r-   r   r0   rK   rL   rM   rN   rP   rQ   rT   n_neighborsrU   rV   rW   sqrtr&   _get_connectivities_tree_v1_0r   rY   )r1   rE   rF   rZ   r=   r&   r\   r]   r   Zn_neighbors_sqr;   r_   r`   Zgeom_mean_approx_knnra   r*   r*   r+   r5      s0    
 
z!PAGA._compute_connectivities_v1_0c                    s   | j  }d|j |_t|  fddt jd D }tjj| j jt	d t
|D ]*\}}t|dkrV| j ||f  ||f< qV  S )N      ?c                    s   g | ]} |   d  qS rC   nonzeror9   r   r*   r+   r>      s   z6PAGA._get_connectivities_tree_v1_2.<locals>.<listcomp>r   Zdtype)r   r   rH   r   rO   shapespsparse
lil_matrixfloat	enumeraterG   rY   )r1   Zinverse_connectivitiesconnectivities_tree_indicesr;   r   r*   ri   r+   rX      s    

 z"PAGA._get_connectivities_tree_v1_2c                    s   |  }d|j |_t|  fddt jd D }tjj|jtd t	|D ]*\}}t
|dkrR| j||f  ||f< qR  S )Nre   c                    s   g | ]} |   d  qS rf   rg   r9   ri   r*   r+   r>      s   z6PAGA._get_connectivities_tree_v1_0.<locals>.<listcomp>r   rj   )r   rH   r   rO   rk   rl   rm   rn   ro   rp   rG   r   rY   )r1   r]   Zinverse_inter_esrq   r;   r   r*   ri   r+   rd      s    
z"PAGA._get_connectivities_tree_v1_0c                 C   s  d}|| j jkrDd| j jkr<| j jd | j j|< td ntd| j j| j| j j| j jfkrtd| j j| j d| j j| j jf dd l}tj	| j j| 
dd	d
}|j|| j j| j jjjd}|jdd}tj|dd}||j }| }| }| jjt|  }t|j|j|jD ]@\}	}
}t||	 ||
  }|dk rXdn|| ||	|
f< q*|   |j| _!d S )NZvelocity_graphvelocyto_transitionszDThe key 'velocyto_transitions' has been changed to 'velocity_graph'.zsThe passed AnnData needs to have an `uns` annotation with key 'velocity_graph' - a sparse matrix from RNA velocity.z'The passed 'velocity_graph' have shape z but shoud have shape r   boolTr7   r8   r?   r@   rA   rB   )"r-   r   r    debugr   rk   Zn_obsrE   r   rI   astyperJ   r   r0   rK   rL   rM   rP   rQ   rS   r   rT   r.   rb   r$   r%   rN   rU   rV   rW   rH   rc   eliminate_zerosr   )r1   ZvkeyrE   rZ   r=   cg_fulltransitionsZtransitions_conftotal_nr;   r_   r`   	referencer*   r*   r+   r'     sD    $ 
"zPAGA.compute_transitionsc              	   C   sx  dd l }tj| jjd dd}|j|| jj| j jj	j
d}|jdd}tj| jjd ddd}|j|| jj| j jj	j
d}|jd	d}tj|d
d}| jjt|  }	| }
| }ddlm} t|jd D ]t}||  d }|D ]X}|jj||dd
 }|jj||dd
 }tt|tt|  }t|dk rd|
||f< d|
||f< d|||f< d|||f< q||d\}}|dkrtt|d |
||f< d|
||f< n&tt|d |
||f< d|
||f< t|	| |	|  }t|t| | }|dkr6||||f< d|||f< q| |||f< d|||f< qq|
   |   |
j!| _"|j!| _#d S )Nr   rr   Tr7   r8   Fr@   rs   r?   rA   rB   )ttest_1samprC   )_source_target   g        g|=)$rE   r   rI   r-   r   rJ   r   r0   rK   rL   rM   rP   ru   rQ   r.   rb   r$   r%   rN   r   scipy.statsr{   rO   rk   rh   r^   selectlistrG   log10maxrc   rv   rS   transitions_ttestr   )r1   rE   rZ   r=   rw   Zg_boolZvc_boolZcg_boolrx   ry   r   r   r{   r;   r   r_   forwardZbackwardrM   tZprobZ	geom_meandiffr*   r*   r+   compute_transitions_old=  sj    
  


zPAGA.compute_transitions_old)r   N)__name__
__module____qualname__r2   r#   r4   r5   rX   rd   r'   r   r*   r*   r*   r+   r"      s   

%6r"   )r   returnc                 C   s6   ddl }|| jd d }dd |jddD }|S )	zCompute the degree of each node in the abstracted graph.

    Parameters
    ----------
    adata
        Annotated data matrix.

    Returns
    -------
    List of degrees for each node.
    r   Nr   r   c                 S   s   g | ]\}}|qS r*   r*   )r:   _dr*   r*   r+   r>     s     z paga_degrees.<locals>.<listcomp>rA   )rA   )networkxGraphr   degree)r   nxrZ   degreesr*   r*   r+   paga_degrees  s    r   )r   c           	      C   s   ddl m} tj| | jd d d\}}g }|D ]R}| j|  }tj|ddd}|t	| t
|t	|  }||| q0|S )	zCompute the median expression entropy for each node-group.

    Parameters
    ----------
    adata : AnnData
        Annotated data matrix.

    Returns
    -------
    Entropies of median expressions for each node.
    r   )entropyr   r   )keyrC   T)rD   Zoverwrite_input)r   r   r   Zselect_groupsr   XZtodenser$   Z	nanmedianZnanminZnanmaxappend)	r   r   Zgroups_orderZgroups_masksZ	entropiesmaskZX_maskZx_medianZx_probsr*   r*   r+   paga_expression_entropies  s     
r   c                   @   s.   e Zd ZU eed< eed< eed< eed< dS )PAGAComparePathsResult
frac_stepsn_steps
frac_pathsn_pathsN)r   r   r   ro   __annotations__intr*   r*   r*   r+   r     s   
r   r   )adata1adata2adjacency_keyadjacency_key2r   c           (         s  ddl }|| jd | ||jd |dk	r4|n| }fdd D }td|  | jd d }t| j| j	|j| j	 t|j| j	| j| j	| j| j
j|j| j
jddl}d}	d}
d}d}|j|dd	D ]\}} | d  | d  }}fd
d||fD \}}fdd||fD \}}td| d| d| d| d	 z$dd |t|t|D }W n |jk
r   d}Y nX z$dd ||t|t|D }W n |jk
r   d}Y nX |dkr,|dkr,|d7 }|d7 }|	d7 }	|
d7 }
td qn&|dks@|dkrR|d7 }|	d7 }	qt|t|kr fdd|D }|}d}fdd|D }fdd|D }n>fdd|D }|}d}fdd|D }fdd|D }d}d}t|dd D ]\}}t|D ]\} }!| |k s||!ks| d t|k r||d  || d  ksXqtd| d||d   d| d| d|  d  d!}"t| |dD ]}#|||#d  krd"}"q|"rtt| d |d d}$td#|$ d$ |d7 }| d } qqqt|d }%|
|7 }
|	|%7 }	|d7 }||%kr<|d7 }fd%d|D }&fd&d|D }'td'|& d(d)d |D  d*|' d+| d,|% d- qt|	dkr|
|	 ntj|	dkr|	ntj|	dkr|| ntj|	dkr|ntjd.S )/a  Compare paths in abstracted graphs in two datasets.

    Compute the fraction of consistent paths between leafs, a measure for the
    topological similarity between graphs.

    By increasing the verbosity to level 4 and 5, the paths that do not agree
    and the paths that agree are written to the output, respectively.

    The PAGA "groups key" needs to be the same in both objects.

    Parameters
    ----------
    adata1, adata2
        Annotated data matrices to compare.
    adjacency_key
        Key for indexing the adjacency matrices in `.uns['paga']` to be used in
        adata1 and adata2.
    adjacency_key2
        If provided, used for adata2.

    Returns
    -------
    NamedTuple with attributes

    frac_steps
        fraction of consistent steps
    n_steps
        total number of steps in paths
    frac_paths
        Fraction of consistent paths
    n_paths
        Number of paths
    r   Nr   c                    s"   g | ]}  |d krt|qS rf   )r   strr:   x)g1r*   r+   r>     s      z&paga_compare_paths.<locals>.<listcomp>zleaf nodes in graph 1: r   r   )rc                    s   g | ]} t | qS r*   r   r9   orig_names1r*   r+   r>     s     c                    s   g | ]} t | qS r*   r   r9   orig_names2r*   r+   r>     s     z&compare shortest paths between leafs (z, z) in graph1 and (z) in graph2:c                 S   s   g | ]}t |qS r*   r   r   r*   r*   r+   r>     s     c                 S   s   g | ]}t |qS r*   r   r   r*   r*   r+   r>     s     rC   z,there are no connecting paths in both graphsc                    s   g | ]} | qS r*   r*   r:   l)asso_groups1r*   r+   r>     s     c                    s   g | ]} fd d|D qS )c                    s   g | ]} t | qS r*   r   r:   sr   r*   r+   r>     s     1paga_compare_paths.<locals>.<listcomp>.<listcomp>r*   r   r   r*   r+   r>     s    c                    s   g | ]} fd d|D qS )c                    s   g | ]} t | qS r*   r   r   r   r*   r+   r>     s     r   r*   r   r   r*   r+   r>     s    c                    s   g | ]} | qS r*   r*   r   )asso_groups2r*   r+   r>   "  s     c                    s   g | ]} fd d|D qS )c                    s   g | ]} t | qS r*   r   r   r   r*   r+   r>   &  s     r   r*   r   r   r*   r+   r>   %  s    c                    s   g | ]} fd d|D qS )c                    s   g | ]} t | qS r*   r   r   r   r*   r+   r>   )  s     r   r*   r   r   r*   r+   r>   (  s    zfound matching step (z -> z) at position z in pathz and position z in path_mappedTFz$    step(s) backward to position(s) z) in path_mapped are fine, too: valid stepc                    s   g | ]} t | qS r*   r   r   r   r*   r+   r>   U  s     c                    s   g | ]} t | qS r*   r   r   r   r*   r+   r>   V  s     z      path1 = z,
path_mapped = c                 S   s   g | ]}t |qS r*   )r   )r:   pr*   r*   r+   r>   X  s     z,
      path2 = z,
-> n_agreeing_steps = z / n_steps = r3   )r   r   r   r   )r   r   r   Znodesr    rt   r   Zidentify_groupsr   rM   rK   
categories	itertoolscombinationsZshortest_pathr   ZNetworkXNoPathrG   rp   rO   r   r   r$   nan)(r   r   r   r   r   g2Zleaf_nodes1Zpaga_groupsr   r   Zn_agreeing_stepsr   Zn_agreeing_pathsr   r   r2s2Zon1_g1Zon2_g1Zon1_g2Zon2_g2Zpath1Zpath2Zpath_mappedZpath_compareZpath_compare_idZpath_compare_orig_namesZpath_mapped_orig_namesZn_agreeing_steps_pathZip_progressilr   ipr   Zconsistent_historyZiipZpossZn_steps_pathZpath1_orig_namesZpath2_orig_namesr*   )r   r   r   r   r   r+   paga_compare_paths  s    '



$
$





*

,r   )NFr   NF)r   N)typingr   r   r   numpyr$   Zscipyrl   Zanndatar   Zscipy.sparse.csgraphr    r   r	   r    r   r
   Z_compatr   r6   r   rs   r   r"   r   r   ro   r   r   r   r*   r*   r*   r+   <module>   sJ          l
  