U
    mdN                  	   @   s  d dl mZ d dlmZmZmZ d dlZd dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ edZdeeeeeee f  eeee f eee edddZ G dd deZ!G dd dZ"dS )    )MutableMapping)IterableUnionOptionalN)version)check_random_state)issparse)AnnData   )settings)logging)_rp_forest_generate)NeighborsView)pkg_versionz0.7rc1umappcaknnT)adata	adata_refobsembedding_methodlabeling_methodneighbors_keyinplacec                 K   s  t d}|tk r,tdt d| dt dtd}	t|trF|gn|}t|trZ|gn|}t|trn|gn|}t|dkrt|pg dkr|t| }t||}
|
	|  |D ]}|

| q|dk	r|
jf | t|D ]\}}|
|||  qtjd	|	d
 |
|S )u4      Map labels and embeddings from reference data to new data.

    :tutorial:`integrating-data-using-ingest`

    Integrates embeddings and annotations of an `adata` with a reference dataset
    `adata_ref` through projecting on a PCA (or alternate
    model) that has been fitted on the reference data. The function uses a knn
    classifier for mapping labels and the UMAP package [McInnes18]_ for mapping
    the embeddings.

    .. note::

        We refer to this *asymmetric* dataset integration as *ingesting*
        annotations from reference data to new data. This is different from
        learning a joint representation that integrates both datasets in an
        unbiased way, as CCA (e.g. in Seurat) or a conditional VAE (e.g. in
        scVI) would do.

    You need to run :func:`~scanpy.pp.neighbors` on `adata_ref` before
    passing it.

    Parameters
    ----------
    adata
        The annotated data matrix of shape `n_obs` × `n_vars`. Rows correspond
        to cells and columns to genes. This is the dataset without labels and
        embeddings.
    adata_ref
        The annotated data matrix of shape `n_obs` × `n_vars`. Rows correspond
        to cells and columns to genes.
        Variables (`n_vars` and `var_names`) of `adata_ref` should be the same
        as in `adata`.
        This is the dataset with labels and embeddings
        which need to be mapped to `adata`.
    obs
        Labels' keys in `adata_ref.obs` which need to be mapped to `adata.obs`
        (inferred for observation of `adata`).
    embedding_method
        Embeddings in `adata_ref` which need to be mapped to `adata`.
        The only supported values are 'umap' and 'pca'.
    labeling_method
        The method to map labels in `adata_ref.obs` to `adata.obs`.
        The only supported value is 'knn'.
    neighbors_key
        If not specified, ingest looks adata_ref.uns['neighbors']
        for neighbors settings and adata_ref.obsp['distances'] for
        distances (default storage places for pp.neighbors).
        If specified, ingest looks adata_ref.uns[neighbors_key] for
        neighbors settings and
        adata_ref.obsp[adata_ref.uns[neighbors_key]['distances_key']] for distances.
    inplace
        Only works if `return_joint=False`.
        Add labels and embeddings to the passed `adata` (if `True`)
        or return a copy of `adata` with mapped embeddings and labels.

    Returns
    -------
    * if `inplace=False` returns a copy of `adata`
      with mapped embeddings and labels in `obsm` and `obs` correspondingly
    * if `inplace=True` returns `None` and updates `adata.obsm` and `adata.obs`
      with mapped embeddings and labels

    Example
    -------
    Call sequence:

    >>> import scanpy as sc
    >>> sc.pp.neighbors(adata_ref)
    >>> sc.tl.umap(adata_ref)
    >>> sc.tl.ingest(adata, adata_ref, obs='cell_type')

    .. _ingest PBMC tutorial: https://scanpy-tutorials.readthedocs.io/en/latest/integrating-pbmcs-using-ingest.html
    .. _ingest Pancreas tutorial: https://scanpy-tutorials.readthedocs.io/en/latest/integrating-pancreas-using-ingest.html
    anndataz*ingest only works correctly with anndata>=z (you have z) as prior to z4, `AnnData.concatenate` did not concatenate `.obsm`.zrunning ingest   Nz    finished)time)r   ANNDATA_MIN_VERSION
ValueErrorlogginfo
isinstancestrlenIngestfitmap_embedding	neighbors	enumerate
map_labelsto_adata)r   r   r   r   r   r   r   kwargsZanndata_versionstartZingmethodicol r1   M/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/tools/_ingest.pyingest   s.    V


r3   c                   @   sF   e Zd ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Z	dS )_DimDictr   Nc                 C   s(   i | _ || _|| _|d k	r$| | d S N)_data_dim_axisupdate)selfdimaxisvalsr1   r1   r2   __init__   s
    z_DimDict.__init__c              
   C   sN   |j | j | jkr@td| d|j | j  d| j d| j d	|| j|< d S )NzValue passed for key 'z)' is of incorrect shape. Value has shape z for dimension z while it should have .)shaper8   r7   r   r6   )r:   keyvaluer1   r1   r2   __setitem__   s
    (z_DimDict.__setitem__c                 C   s
   | j | S r5   r6   r:   rA   r1   r1   r2   __getitem__   s    z_DimDict.__getitem__c                 C   s   | j |= d S r5   rD   rE   r1   r1   r2   __delitem__   s    z_DimDict.__delitem__c                 C   s
   t | jS r5   )iterr6   r:   r1   r1   r2   __iter__   s    z_DimDict.__iter__c                 C   s
   t | jS r5   )r$   r6   rI   r1   r1   r2   __len__   s    z_DimDict.__len__c                 C   s   t | j d| j dS )N())type__name__r6   rI   r1   r1   r2   __repr__   s    z_DimDict.__repr__)r   N)
rO   
__module____qualname__r>   rC   rF   rG   rJ   rK   rP   r1   r1   r1   r2   r4      s   

r4   c                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zd)ddZ	d*ddZ
dd Zdd Zd+ddZdd Zdd Zdd Zd d! Zd,d#d$Zd-d'd(ZdS ).r%   uX      Class to map labels and embeddings from existing data to new data.

    You need to run :func:`~scanpy.pp.neighbors` on `adata` before
    initializing Ingest with it.

    Parameters
    ----------
    adata : :class:`~anndata.AnnData`
        The annotated data matrix of shape `n_obs` × `n_vars`
        with embeddings and labels.
    c                 C   sV  dd l }| jsd|j_|j| j|jd d ddd| _| jj	| j_
| j| j_d | j_| j  |jd | j_t| j| j_| jjd dk | j_| j| j_| j| j_| j| j_| js| jd k	s| jd k	r| j| j_| j| j_| j| j_| jd k	r| j| j_| j| j_| j| j_n
| j| j_ |jd d d	 | j_!|jd d d
 | j_"d | j_#d S )Nr   Fr   paramsrandom_state)metricrT   X_umapi   ab)$r   _use_pynndescentZumap_Z_HAVE_PYNNDESCENTZUMAP_metricunsget_umapZlearning_rateZ_initial_alpha_rep	_raw_dataZ	knn_distsZ_validate_parametersobsmZ
embedding_r   Z_sparse_datar@   Z_small_data_metric_kwds_n_neighborsn_neighbors_random_init
_tree_init_search
_dist_funcZ_input_distance_func
_rp_forest_search_graph_nnd_idxZ_knn_search_indexZ_a_bZ_input_hash)r:   r   ur1   r1   r2   
_init_umap   s<    











zIngest._init_umapc                    s   ddl m} ddlm} ddlm} d | _d | _d | _d | _	d | _
|| j tdtdk rddlm}m} | \| _| _||| j| jd}| }nHdd	lm}	 dd
lm}
 |	 fdd}|||d}||
|d}|| _
|| _|| _	d S )Nr   )partial)initialise_search)named_distances
umap-learnz0.4.0)make_initialisationsmake_initialized_nnd_search)Zinit_from_randomZinit_from_tree)njit)initialized_nnd_searchc                    s   | |f  S r5   r1   )xy	dist_argsZ	dist_funcr1   r2   partial_dist_func  s    z3Ingest._init_dist_search.<locals>.partial_dist_func)dist)	functoolsrn   Zumap.nndescentro   Zumap.distancesrp   rd   re   _initialise_searchrf   rg   rZ   r   r   parserr   rs   Znumbart   ru   )r:   ry   rn   ro   rp   rr   rs   r}   rf   rt   ru   rz   r1   rx   r2   _init_dist_search   s<    
 zIngest._init_dist_searchc              	   C   s   ddl m} d| _t|jd d d d f }t|t| j	f}|| j
| j| j| j|| jd| _ddlm} t| jj}|| jj| jj| jj| jj| jj|| jj| jj| j_d S )Nr   )	NNDescentT)datarU   metric_kwdsrc   Z
init_graphrT   )make_forest)Zpynndescentr   rY   npZaranger@   ZhstackstackZtolilrowsr^   rZ   ra   rb   _neigh_random_staterj   Zpynndescent.rp_treesr   r   rT   r_   rc   Zn_search_treesZ	leaf_size	rng_stateZn_jobsZ_angular_treesrh   )r:   	distancesr   Z	first_colZinit_indicesr   Zcurrent_random_stater1   r1   r2   _init_pynndescent  s0    
zIngest._init_pynndescentc                 C   s  t ||}|d d | _d|d krR|d d | _| jdkrB|jn
|j| j | _nd|d krd| _|d d | _|jd d d d | jf | _nL|jtj	krd|j
 krd| _|jd d d d tj	f | _| jjd | _d|d kr
|d d | _t| j }n
i | _d	}|d d
 | _tdtdk r| | |d  }|jdktj|_|| | _d|krt|d | _nd | _n |d dd| _|  |d  d S )NrS   rc   Zuse_repXn_pcsX_pcar   r   r1   rU   rq   z0.5.0r   r   Z	rp_forestrT   )!r   rb   _use_repr   r`   r^   _n_pcsZn_varsr   ZN_PCSkeysr@   ra   tuplevaluesrZ   r   r   r~   r   copyr   astyper   Zint8maximumZ	transposeri   r   rh   r\   r   r   )r:   r   r   r(   ry   Zsearch_graphr1   r1   r2   _init_neighbors9  s:    
  

zIngest._init_neighborsc                 C   sr   |j d d d | _|j d d d | _| jrDd|j krDtd| jrb|jd |jd  | _n|jd | _d S )Nr   rS   Zzero_centerZuse_highly_variablehighly_variablez*Did not find adata.var['highly_variable'].PCs)r[   _pca_centered_pca_use_hvgvarr   r   varm
_pca_basisr:   r   r1   r1   r2   	_init_pcab  s    zIngest._init_pcaNc                 C   s   |j | _d| _d | _|| _d | _d| _d|jkr:| | |d krFd}||jkr^| 	|| nt
d| dd|jkr| | d | _d | _d | _d | _d | _d S )Nr   Fr   r(   z*There is no neighbors data in `adata.uns["z"]`.
Please run pp.neighbors.rV   )r   r^   r   r   
_adata_ref
_adata_newrY   r[   r   r   r   r`   rm   _obsm_obsZ_labels_indices
_distances)r:   r   r   r1   r1   r2   r>   n  s,    





zIngest.__init__c                 C   sv   | j j}t|r| n| }| jr>|d d | jjd f }| jrT||j	dd8 }t
|| jd d d |f }|S )Nr   r   r<   )r   r   r   Ztoarrayr   r   r   r   r   Zmeanr   dotr   )r:   r   r   r   r1   r1   r2   _pca  s    zIngest._pcac                 C   sN   | j }| jd k	r| | jS | jdkr,|jS | j|j krH|j| j S |jS )Nr   )r   r   r   r   r   r`   r   r   r1   r1   r2   	_same_rep  s    

zIngest._same_repc                 C   sf   | j jj }|jj }||s,tdtj|jj	d| _
t|jdd| _|| _|  | jd< dS )a          Map `adata_new` to the same representation as `adata`.

        This function identifies the representation which was used to
        calculate neighbors in 'adata' and maps `adata_new` to
        this representation.
        Variables (`n_vars` and `var_names`) of `adata_new` should be the same
        as in `adata`.

        `adata` refers to the :class:`~anndata.AnnData` object
        that is passed during the initialization of an Ingest instance.
        zNVariables in the new adata are different from variables in the reference adata)indexr   r   repN)r   Z	var_namesr#   upperequalsr   pdZ	DataFramer   r   r   r4   Zn_obsr   r   r   )r:   Z	adata_newZref_var_namesZnew_var_namesr1   r1   r2   r&     s    
z
Ingest.fit   皙?r   c                 C   s   ddl m}m} t|}|||dtj}| j}| j	d }	|dkrL| j
}| jrt|| j_| j|	||\| _| _n~ddlm}
 | j| j||	t|| |d}| || jj| jj||	}|
|\}}|ddd|f |ddd|f  | _| _dS )z        Calculate neighbors of `adata_new` observations in `adata`.

        This function calculates `k` neighbors in `adata` for
        each observation of `adata_new`.
        r   )	INT32_MAX	INT32_MIN   r   N)deheap_sort)r   )Z
umap.umap_r   r   r   randintr   r   Zint64r^   r   rb   rY   rj   Zsearch_rng_statequeryr   r   Z
umap.utilsr   r}   rh   intrf   ri   Zindptrindices)r:   kZ
queue_sizeepsilonrT   r   r   r   traintestr   initresultr   distsr1   r1   r2   r(     s6    
   
     zIngest.neighborsc                 C   s   | j | jd S )Nr   )r]   Z	transformr   rI   r1   r1   r2   _umap_transform  s    zIngest._umap_transformc                 C   s<   |dkr|   | jd< n |dkr0|  | jd< ntddS )z        Map embeddings of `adata` to `adata_new`.

        This function infers embeddings, specified by `method`,
        for `adata_new` from existing embeddings in `adata`.
        `method` can be 'umap' or 'pca'.
        r   rV   r   r   z5Ingest supports only umap and pca embeddings for now.N)r   r   r   NotImplementedError)r:   r.   r1   r1   r2   r'     s    zIngest.map_embeddingc                    s8   | j j| d  fdd| jD }tj| jjdS )Ncategoryc                    s   g | ]} |   d  qS )r   )mode).0ZindsZ	cat_arrayr1   r2   
<listcomp>  s     z(Ingest._knn_classify.<locals>.<listcomp>)r   
categories)r   r   r   r   r   ZCategoricalcatr   )r:   labelsr   r1   r   r2   _knn_classify  s
    zIngest._knn_classifyc                 C   s&   |dkr|  || j|< ntddS )z        Map labels of `adata` to `adata_new`.

        This function infers `labels` for `adata_new.obs`
        from existing labels in `adata.obs`.
        `method` can be only 'knn'.
        r   z%Ingest supports knn labeling for now.N)r   r   r   )r:   r   r.   r1   r1   r2   r*     s    zIngest.map_labelsFc                 C   sJ   |r
| j n| j  }|j| j | jD ]}| j| |j|< q(|sF|S dS )aV          Returns `adata_new` with mapped embeddings and labels.

        If `inplace=False` returns a copy of `adata_new`
        with mapped embeddings and labels in `obsm` and `obs` correspondingly.
        If `inplace=True` returns nothing and updates `adata_new.obsm`
        and `adata_new.obs` with mapped embeddings and labels.
        N)r   r   r`   r9   r   r   r   )r:   r   r   rA   r1   r1   r2   r+     s    	
zIngest.to_adatabatch-c                 C   s   | j j| j|||d}| j }||j| dk j|_|j| | j	D ]2}|| j j
krHt| j j
| | j	| f|j
|< qH| jdkrt| j j
| j | j	d f|j
| j< d| j	kr| j jd |jd< d| j	kr| j jd |jd< | j jd	 |jd	< |S )
z        Returns concatenated object.

        This function returns the new :class:`~anndata.AnnData` object
        with concatenated existing embeddings and labels of 'adata'
        and inferred embeddings and labels for `adata_new`.
        )	batch_keybatch_categoriesindex_unique1)r   r   r   rV   r   r   r   r   )r   Zconcatenater   r   r   r   Z	obs_namesr   r9   r   r`   r   Zvstackr   r[   r   )r:   r   r   r   r   Z
obs_updaterA   r1   r1   r2   to_adata_joint"  s0    





zIngest.to_adata_joint)N)N)Nr   r   r   )F)r   Nr   )rO   rQ   rR   __doc__rm   r   r   r   r   r>   r   r   r&   r(   r   r'   r   r*   r+   r   r1   r1   r1   r2   r%      s(   -/ )
%



$
     r%   )Nr   r   NT)#collections.abcr   typingr   r   r   Zpandasr   numpyr   	packagingr   Zsklearn.utilsr   Zscipy.sparser   r   r	    r   r   r    r(   r   _utilsr   Z_compatr   r~   r   r#   boolr3   r4   r%   r1   r1   r1   r2   <module>   s:   
     y"