U
    md34                     @   s  d Z ddlmZmZmZmZmZ ddlZddl	Z
ddlmZ ddlmZ ddlmZ dddddd	eeeee f eee ee ee ee e
jd
ddZd$e
je
jed ee ee
j eeee ee ee f dddZe
jee ed edddZd%ddddeee eeeef  eeee
jdddZd&ddeee eeeef  ee
jdddZdddddd d!Zdddddd"d#ZdS )'z9This module contains helper functions for accessing data.    )OptionalIterableTupleUnionListN)spmatrix)AnnData   )LiteralZrank_genes_groups)keypval_cutoff
log2fc_min
log2fc_maxgene_symbols)adatagroupr   r   r   r   r   returnc                   s  t trgdkr.t j d jjdddddg} fdd|D }tj|d	dd
g|d}|jd	d	 }tj
|d
 d|d
< |d
dgjdd}|dk	r||d |k  }|dk	r||d |k }|dk	r||d |k  }|dk	r
|j j| dd}ddd D ]N\}	}
|	 j kr j |	  jdd	 jdd
|
d}||}qtd	kr|jd
dd |j	ddS )a      :func:`scanpy.tl.rank_genes_groups` results in the form of a
    :class:`~pandas.DataFrame`.

    Params
    ------
    adata
        Object to get results from.
    group
        Which group (as in :func:`scanpy.tl.rank_genes_groups`'s `groupby`
        argument) to return results from. Can be a list. All groups are
        returned if groups is `None`.
    key
        Key differential expression groups were stored under.
    pval_cutoff
        Return only adjusted p-values below the  cutoff.
    log2fc_min
        Minimum logfc to return.
    log2fc_max
        Maximum logfc to return.
    gene_symbols
        Column name in `.var` DataFrame that stores gene symbols. Specifying
        this will add that column to the returned dataframe.

    Example
    -------
    >>> import scanpy as sc
    >>> pbmc = sc.datasets.pbmc68k_reduced()
    >>> sc.tl.rank_genes_groups(pbmc, groupby="louvain", use_raw=True)
    >>> dedf = sc.get.rank_genes_groups_df(pbmc, group="0")
    NnamesZscoresZlogfoldchangesZpvalsZ	pvals_adjc                    s$   g | ]}t  j |  qS  )pd	DataFrameuns).0cr   r   r   r   G/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/get/get.py
<listcomp>@   s     z(rank_genes_groups_df.<locals>.<listcomp>   r   )axisr   keys)level)
categoriesZlevel_0)columns)onZpct_nz_groupZpct_nz_reference)ptsZpts_restindex)Zid_varsvar_nameZ
value_nameT)r"   Zinplace)drop)
isinstancestrlistr   Zdtyper   r   concatstackZreset_indexZCategoricalZsort_valuesr(   joinvaritemsZrename_axisZmeltmergelen)r   r   r   r   r   r   r   Zcolnamesdr$   nameZpts_dfr   r   r   rank_genes_groups_df   s@    )


  r5   Fobsr/   )dim_df	alt_indexdimr   alias_indexuse_rawr   c                 C   s  |r
d}nd}d|dk }d}|dk	rLt j||d}	|j}| d| d}
nt j||d}	| d	}
g }g }g }g }| jjs| j| j   }td
| d| |jst| d| d| d| dt	|D ]}|| jkr|
| ||	jkrtd| d| d| d|
 d	q||	jkr|	| }t|t jrj|dk	sNttd| d| d|
 d|
| |
| q|
| qt|dkrtd| d| d| d|
 d	|||fS )z8Common logic for checking indices for obs_df and var_df.z	adata.rawr   r6   r7   Nr%   z['z']Z_nameszadata.z_ contains duplicated columns. Please rename or remove these columns first.
`Duplicated columns .z5_names contains duplicated items
Please rename these z& names first for example using `adata.z_names_make_unique()`z	The key 'z' is found in both adata.z and zFound duplicate entries for 'z' in r   zCould not find keys 'z' in columns of `adata.z` or in )r   ZSeriesr4   r"   Z	is_uniqueZ
duplicatedtolist
ValueErrornpuniqueappendr&   KeyErrorr)   AssertionErrorr2   )r8   r9   r:   r   r;   r<   Zalt_reprZalt_dimZ
alias_nameZ	alt_namesZalt_search_reprZcol_keysZ
index_keysZindex_aliases	not_foundZdup_colsr   valr   r   r   _check_indices`   s\    	


rG   )r   r   )	dim_namesr   r   backedc                 C   s   t d t d g}||}|r`t|}| }|| ||< t|||< | t| t| }	n|||< | t| }	ddlm}
 |
|	r|	 }	|	S )Nr   )issparse)	sliceZget_indexerr@   Zargsortcopytuplescipy.sparserJ   toarray)XrH   r   r   rI   Zmutable_idxeridxZ	idx_orderZ	rev_idxermatrixrJ   r   r   r   _get_array_values   s    

rS   r   )layerr   r<   )r   r   	obsm_keysrT   r   r<   r   c                C   s  |r|dkst d| jj}n| j}|dk	r<t|| }nd}t| j|jd|||d\}}	}
tj| j	d}t
|	dkrtt| ||d|j|	d| jd	}tj|tj||
| j	d
gdd}t
|dkrtj|| j| gdd}|r|| }|D ]\}}| d| }| j| }t|tjr6t|dd|f ||< qt|trbt|dd|f  ||< qt|tjr|jdd|f ||< q|S )a      Return values for observations in adata.

    Params
    ------
    adata
        AnnData object to get values from.
    keys
        Keys from either `.var_names`, `.var[gene_symbols]`, or `.obs.columns`.
    obsm_keys
        Tuple of `(key from obsm, column index of obsm[key])`.
    layer
        Layer of `adata` to use as expression values.
    gene_symbols
        Column of `adata.var` to search for `keys` in.
    use_raw
        Whether to get expression values from `adata.raw`.

    Returns
    -------
    A dataframe with `adata.obs_names` as index, and values specified by `keys`
    and `obsm_keys`.

    Examples
    --------
    Getting value for plotting:

    >>> pbmc = sc.datasets.pbmc68k_reduced()
    >>> plotdf = sc.get.obs_df(
            pbmc,
            keys=["CD8B", "n_genes"],
            obsm_keys=[("X_umap", 0), ("X_umap", 1)]
        )
    >>> plotdf.plot.scatter("X_umap0", "X_umap1", c="CD8B")

    Calculating mean expression for marker genes by cluster:

    >>> pbmc = sc.datasets.pbmc68k_reduced()
    >>> marker_genes = ['CD79A', 'MS4A1', 'CD8A', 'CD8B', 'LYZ']
    >>> genedf = sc.get.obs_df(
            pbmc,
            keys=["louvain", *marker_genes]
        )
    >>> grouped = genedf.groupby("louvain")
    >>> mean, var = grouped.mean(), grouped.var()
    Nz9Cannot specify use_raw=True and a layer at the same time.r7   )r;   r<   r%   r   )rT   r<   r   r   rI   r"   r&   r   -)rD   rawr/   r   IndexrG   r7   r&   r   	obs_namesr2   rS   _get_obs_repisbackedr,   obsmr)   r@   ndarrayravelr   rO   loc)r   r   rU   rT   r   r<   r/   r;   Zobs_colsZvar_idx_keysZvar_symbolsdfrR   krQ   added_krF   r   r   r   obs_df   sZ    7


 rf   rT   )r   r   	varm_keysrT   r   c                C   sD  t | j| jd|\}}}tj| jjd}t|dkrttt| |d| j|d| j	dj
}tj|tj||| jdgdd}t|dkrtj|| j| gdd}|r|| }|D ]\}	}
|	 d	|
 }| j|	 }t|tjrt|d
d
|
f ||< qt|trt|d
d
|
f  ||< qt|tjr|jd
d
|
f ||< q|S )a      Return values for observations in adata.

    Params
    ------
    adata
        AnnData object to get values from.
    keys
        Keys from either `.obs_names`, or `.var.columns`.
    varm_keys
        Tuple of `(key from varm, column index of varm[key])`.
    layer
        Layer of `adata` to use as expression values.

    Returns
    -------
    A dataframe with `adata.var_names` as index, and values specified by `keys`
    and `varm_keys`.
    r/   r%   r   rg   rV   rW   r   rX   rY   N)rG   r/   r\   r   r   r&   r2   rS   r]   r^   Tr,   Z	var_namesZvarmr)   r@   r`   ra   r   rO   rb   )r   r   rh   rT   Zvar_colsZobs_idx_keys_rc   rR   rd   rQ   re   rF   r   r   r   var_df?  s8    

 rk   )r<   rT   r_   obspc          
      C   s   t |tstdt| d|dk	}|dk	}|dk	}|dk	}t||||f}	|	dksZt|	dkrh| jS |rv| j| S |r| jjS |r| j	| S |r| j
| S dstddS )z3
    Choose array aligned with obs annotation.
    z!use_raw expected to be bool, was r=   NFr   r   [That was unexpected. Please report this bug at:

	 https://github.com/scverse/scanpy/issues)r)   bool	TypeErrortypesumrD   rP   layersrZ   r_   rl   )
r   r<   rT   r_   rl   is_layeris_rawis_obsmis_obspchoices_mader   r   r   r]     s*    



r]   c                C   s   |dk	}|dk	}|dk	}|dk	}	t ||||	f}
|
dks<t|
dkrL|| _nJ|r\|| j|< n:|rj|| j_n,|rz|| j|< n|	r|| j|< ndstddS )z(
    Set value for observation rep.
    NFr   r   rm   )rq   rD   rP   rr   rZ   r_   rl   )r   rF   r<   rT   r_   rl   rs   rt   ru   rv   rw   r   r   r   _set_obs_rep  s&    
rx   )NF)r   r   )r   r   )__doc__typingr   r   r   r   r   numpyr@   Zpandasr   rN   r   Zanndatar   Z_compatr
   r*   floatr   r5   r[   rn   rG   rS   intrf   rk   r]   rx   r   r   r   r   <module>   s   T  Q   t  A