U
    mdK&                     @   s  d dl mZ d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
Z
d dlZd dlZd dlmZ edddddddeeeejejf  ee ee ee ee eeejef d	d
dZe
jddddd Ze
jddddd Ze
jdddd Ze
jdddd Ze
jddddd Ze
jddddd Zedd Zeejeej dd Z!eejdd Z!eej"eej#d d Z!d!d" Z$eej ejd#d$d%Z%dS )&    )singledispatch)OptionalUnionN)AnnData)_get_obs_rep)sparseF)vals	use_graphlayerobsmobspuse_raw)adatar   r	   r
   r   r   r   returnc                C   sx   |dkrLt | dr(d| jkr(| jd }qRd| jkrB| jd d }qRtdnt |dkrnt| ||||dj}t||S )a=  
    Calculate `Geary's C <https://en.wikipedia.org/wiki/Geary's_C>`_, as used
    by `VISION <https://doi.org/10.1038/s41467-019-12235-0>`_.

    Geary's C is a measure of autocorrelation for some measure on a graph. This
    can be to whether measures are correlated between neighboring cells. Lower
    values indicate greater correlation.

    .. math::

        C =
        \frac{
            (N - 1)\sum_{i,j} w_{i,j} (x_i - x_j)^2
        }{
            2W \sum_i (x_i - \bar{x})^2
        }

    Params
    ------
    adata
    vals
        Values to calculate Geary's C for. If this is two dimensional, should
        be of shape `(n_features, n_cells)`. Otherwise should be of shape
        `(n_cells,)`. This matrix can be selected from elements of the anndata
        object by using key word arguments: `layer`, `obsm`, `obsp`, or
        `use_raw`.
    use_graph
        Key to use for graph in anndata object. If not provided, default
        neighbors connectivities will be used instead.
    layer
        Key for `adata.layers` to choose `vals`.
    obsm
        Key for `adata.obsm` to choose `vals`.
    obsp
        Key for `adata.obsp` to choose `vals`.
    use_raw
        Whether to use `adata.raw.X` for `vals`.


    This function can also be called on the graph and values directly. In this case
    the signature looks like:

    Params
    ------
    g
        The graph
    vals
        The values


    See the examples for more info.

    Returns
    -------
    If vals is two dimensional, returns a 1 dimensional ndarray array. Returns
    a scalar if `vals` is 1d.


    Examples
    --------

    Calculate Gearys C for each components of a dimensionality reduction:

    .. code:: python

        import scanpy as sc, numpy as np

        pbmc = sc.datasets.pbmc68k_processed()
        pc_c = sc.metrics.gearys_c(pbmc, obsm="X_pca")


    It's equivalent to call the function directly on the underlying arrays:

    .. code:: python

        alt = sc.metrics.gearys_c(pbmc.obsp["connectivities"], pbmc.obsm["X_pca"].T)
        np.testing.assert_array_equal(pc_c, alt)
    Nr   ZconnectivitiesZ	neighborszMust run neighbors first.)r   r
   r   r   )hasattrr   Zuns
ValueErrorNotImplementedErrorr   Tgearys_c)r   r   r	   r
   r   r   r   g r   Q/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/metrics/_gearys_c.pyr      s    Y

r   T)cacheparallelc                 C   s   |   }t| ||||S N)sum_gearys_c_vec_W)dataindicesindptrxWr   r   r   _gearys_c_vec   s    r"   c                 C   s   t |d }|tj}| }d}t|D ]L}t|| ||d  }	||	 }
| |	 }|t||| ||
  d  7 }q.|d | }d| || d   }|| }|S )N              )	lenastypenpfloat_meannumbaprangeslicer   )r   r   r   r    r!   Nx_bartotalis	i_indicesi_datanumerdenomCr   r   r   r      s    $r   )r   c                 C   s   |  }d}t|}t|D ]L}t|| ||d  }	||	 }
| |	 }|t||| ||
  d  7 }q|d | }d| || d   }|| }|S )Nr$   r#   r%   )r*   r&   r+   r,   r-   r(   r   )g_data	g_indicesg_indptrr    r!   r/   r0   r.   r1   r2   r3   r4   r5   r6   r7   r   r   r   !_gearys_c_inner_sparse_x_densevec   s    $r;   c                 C   s   t j|t jd}|||< t || }d}	t|}t|D ]L}
t||
 ||
d  }|| }| | }|	t |||
 ||  d  7 }	q<|d |	 }d| t |d t || d  |d |   }|| }|S )NZdtyper$   r#   r%   )r(   zerosr)   r   r&   r+   r,   r-   )r8   r9   r:   x_data	x_indicesr.   r!   r    r/   r0   r1   r2   r3   r4   r5   r6   r7   r   r   r   "_gearys_c_inner_sparse_x_sparsevec   s*    $
	r@   c           
      C   sv   |j \}}|t|d kst|  }tj|tjd}t|D ]0}||d d f 	tj}	t
| |||	|||< q@|S )Nr#   r<   )shaper&   AssertionErrorr   r(   r=   r)   r+   r,   r'   r;   )
r8   r9   r:   XMr.   r!   outkr    r   r   r   _gearys_c_mtx   s    
rG   c              	   C   s|   |\}}|   }	tj|tjd}
t||dd }t||dd }t|D ]$}t| |||| || ||	|
|< qR|
S )Nr<   r#   )r   r(   r=   r)   splitr+   r,   r@   )r8   r9   r:   r>   r?   Zx_indptrZx_shaperD   r.   r!   rE   Zx_data_listZx_indices_listrF   r   r   r   _gearys_c_mtx_csr   s     
	rJ   c                 C   s
   t | S r   )r(   Zasarrayvalr   r   r   _resolve_vals   s    rM   c                 C   s   | S r   r   rK   r   r   r   _  s    rN   c                 C   s
   t | S r   )r   
csr_matrixrK   r   r   r   rN   	  s    c                 C   s   |   S r   )Zto_numpyrK   r   r   r   rN     s    c                 C   s|   ddl m} tj| jd tjd}|tj || dd }| rNt	d}n t
tt||   d | | ||fS )z    Checks that values wont cause issues in computation.

    Returns new set of vals, and indexer to put values back into result.

    For details on why this is neccesary, see:
    https://github.com/scverse/scanpy/issues/1806
    r   )is_constantr<   r#   )ZaxisNz4 variables were constant, will return nan for these.)Zscanpy._utilsrP   r(   emptyrA   Zfloat64fillnanallr-   warningswarnUserWarningr&   r   )r   rP   full_resultidxerr   r   r   _check_vals  s    	
rZ   )r   c                 C   sJ  | j d | j d kstdt|}| jjtjdd}t|tj	r| j d |j d ksZtt
|\}}}t|| j| j|jjtjdd|j|j|j }|||< |S t|tjr|jdkr| j d |j d kstt|| j| j|S t|tjr@|jdkr@| j d |j d kstt
|\}}}t|| j| j|}|||< |S t d S )Nr   r#   z'`g` should be a square adjacency matrixF)copyr%   )rA   rB   rM   r   r'   r(   r)   
isinstancer   rO   rZ   rJ   r   r   ndarrayndimr"   rG   r   )r   r   r8   new_valsrY   rX   resultr   r   r   	_gearys_c-  s6    	ra   )&	functoolsr   typingr   r   rU   Zanndatar   Z
scanpy.getr   r+   numpyr(   ZpandaspdZscipyr   r]   Zspmatrixstrboolfloatr   Znjitr"   r   r;   r@   rG   rJ   rM   registerrO   rN   Z	DataFrameZSeriesrZ   ra   r   r   r   r   <module>   sd   v















