U
    md	#                     @   s  d dl mZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZ ddlmZ ddlmZ d	Zd
ZdZeeeedddddee	ee ef e
eeef  eeejdddZeeeeddddeee eeejdddZeeeeddddddeeeee eeejdddZeeeeddddddeeeeeejdd d!Zeeed"d#ei d$e	ee eeee f f eeeef ejd%d&d'Z e !ed#d(d)dddei d*eee
e ee"e
e" e
e" e
e eeef ejd+
d,d-Z#dS ).    N)singledispatch)MappingProxyType)AnyUnionOptionalIterableDictMapping)AnnData   )rank_genes_groups_df)_doc_paramsznorg
    Organism to query. Must be an organism in ensembl biomart. "hsapiens",
    "mmusculus", "drerio", etc.zhost
    A valid BioMart host URL. Alternative values include archive urls (like
    "grch37.ensembl.org") or regional mirrors (like "useast.ensembl.org").zuse_cache
    Whether pybiomart should use a cache for requests. Will create a
    `.pybiomart.sqlite` file in current directory if used.)doc_orgZdoc_hostZdoc_use_cachezwww.ensembl.orgF)filtershost	use_cache)orgattrsr   r   r   returnc          	      C   s   t |tr|g}n*t |tjr(t|}ntdt| dzddlm} W n t	k
rh   t	dY nX |||d}|j
d jd|  }|j||d	d
}|S )z    A simple interface to biomart.

    Params
    ------
    {doc_org}
    attrs
        What you want returned.
    filters
        What you want to pick out.
    {doc_host}
    {doc_use_cache}
    z'attrs must be of type list or str, was .r   )Serverz<This method requires the `pybiomart` module to be installed.)r   ZENSEMBL_MART_ENSEMBLz{}_gene_ensemblT)
attributesr   Zuse_attr_names)
isinstancestrcabcr   list	TypeErrortypeZ	pybiomartr   ImportErrorZmartsZdatasetsformatquery)	r   r   r   r   r   r   serverZdatasetres r#   P/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/queries/_queries.pysimple_query    s"    



r%   )r   r   )r   r   r   r   r   c                C   s   t | |||dS )aO      Retrieve gene annotations from ensembl biomart.

    Parameters
    ----------
    {doc_org}
    attrs
        Attributes to query biomart for.
    {doc_host}
    {doc_use_cache}

    Returns
    -------
    Dataframe containing annotations.

    Examples
    --------
    Retrieve genes coordinates and chromosomes

    >>> import scanpy as sc
    >>> annot = sc.queries.biomart_annotations(
            "hsapiens",
            ["ensembl_gene_id", "start_position", "end_position", "chromosome_name"],
        ).set_index("ensembl_gene_id")
    >>> adata.var[annot.columns] = annot
    r   r   r   r   r%   r&   r#   r#   r$   biomart_annotationsJ   s    "r(   Zexternal_gene_namer#   )	gene_attrchr_excluder   r   )r   	gene_namer)   r*   r   r   r   c                C   s0   t | dddg||i||d}||d |  S )ap      Retrieve gene coordinates for specific organism through BioMart.

    Parameters
    ----------
    {doc_org}
    gene_name
        The gene symbol (e.g. "hgnc_symbol" for human) for which to retrieve
        coordinates.
    gene_attr
        The biomart attribute the gene symbol should show up for.
    chr_exclude
        A list of chromosomes to exclude from query.
    {doc_host}
    {doc_use_cache}

    Returns
    -------
    Dataframe containing gene coordinates for the specified gene symbol.

    Examples
    --------
    >>> import scanpy as sc
    >>> sc.queries.gene_coordinates("hsapiens", "MT-TF")
    chromosome_nameZstart_positionZend_position)r   r   r   r   r   )r%   isin)r   r+   r)   r*   r   r   r"   r#   r#   r$   gene_coordinateso   s    #r.   ZMT)attrnamer   r   
chromosome)r   r/   r   r   r0   r   c                C   s   t | |gd|gi||dS )aP      Mitochondrial gene symbols for specific organism through BioMart.

    Parameters
    ----------
    {doc_org}
    attrname
        Biomart attribute field to return. Possible values include
        "external_gene_name", "ensembl_gene_id", "hgnc_symbol", "mgi_symbol",
        and "zfin_id_symbol".
    {doc_host}
    {doc_use_cache}
    chromosome
        Mitochrondrial chromosome name used in BioMart for organism.

    Returns
    -------
    Dataframe containing identifiers for mitochondrial genes.

    Examples
    --------
    >>> import scanpy as sc
    >>> mito_gene_names = sc.queries.mitochondrial_genes("hsapiens")
    >>> mito_ensembl_ids = sc.queries.mitochondrial_genes("hsapiens", attrname="ensembl_gene_id")
    >>> mito_gene_names_fly = sc.queries.mitochondrial_genes("dmelanogaster", chromosome="mitochondrion_genome")
    r,   )r   r   r   r   r'   )r   r/   r   r   r0   r#   r#   r$   mitochondrial_genes   s    #r1   )r   Zhsapiensr   gprofiler_kwargs)	containerr   r3   r   c                C   s   zddl m} W n tk
r,   tdY nX |ddd}t|}dD ]"}||dk	rFtd	| d
qF|j| fd|i|S )aa      Get enrichment for DE results.

    This is a thin convenience wrapper around the very useful gprofiler_.

    This method dispatches on the first argument, leading to the following two
    signatures::

        enrich(container, ...)
        enrich(adata: AnnData, group, key: str, ...)

    Where::

        enrich(adata, group, key, ...) = enrich(adata.uns[key]["names"][group], ...)

    .. _gprofiler: https://pypi.org/project/gprofiler-official/#description

    Parameters
    ----------
    container
        Contains list of genes you'd like to search. If container is a `dict` all
        enrichment queries are made at once.
    adata
        AnnData object whose group will be looked for.
    group
        The group whose genes should be used for enrichment.
    key
        Key in `uns` to find group under.
    {doc_org}
    gprofiler_kwargs
        Keyword arguments to pass to `GProfiler.profile`, see gprofiler_. Some
        useful options are `no_evidences=False` which reports gene intersections,
        `sources=['GO:BP']` which limits gene sets to only GO biological processes and
        `all_results=True` which returns all results including the non-significant ones.
    **kwargs
        All other keyword arguments are passed to `sc.get.rank_genes_groups_df`. E.g.
        pval_cutoff, log2fc_min.

    Returns
    -------
    Dataframe of enrichment results.

    Examples
    --------
    Using `sc.queries.enrich` on a list of genes:

    >>> import scanpy as sc
    >>> sc.queries.enrich(['KLF4', 'PAX5', 'SOX2', 'NANOG'], org="hsapiens")
    >>> sc.queries.enrich({{'set1':['KLF4', 'PAX5'], 'set2':['SOX2', 'NANOG']}}, org="hsapiens")

    Using `sc.queries.enrich` on an :class:`anndata.AnnData` object:

    >>> pbmcs = sc.datasets.pbmc68k_reduced()
    >>> sc.tl.rank_genes_groups(pbmcs, "bulk_labels")
    >>> sc.queries.enrich(pbmcs, "CD34+")
    r   )	GProfilerzEThis method requires the `gprofiler-official` module to be installed.ZscanpyT)
user_agentZreturn_dataframe)organismNz
Argument `zL` should be passed directly through `enrich`, not through `gprofiler_kwargs`r7   )	gprofilerr5   r   dictget
ValueErrorZprofile)r4   r   r3   r5   r8   kr#   r#   r$   enrich   s    @

r=   Zrank_genes_groupsg?)r   keypval_cutoff
log2fc_min
log2fc_maxgene_symbolsr3   )
adatagroupr   r>   r?   r@   rA   rB   r3   r   c             	   C   sN   t | ||||||d}	|d k	r0t|	|  }
nt|	d  }
t|
||dS )N)rD   r>   r?   r@   rA   rB   namesr2   )r   r   Zdropnar=   )rC   rD   r   r>   r?   r@   rA   rB   r3   deZ	gene_listr#   r#   r$   _enrich_anndata  s    	rG   )$collections.abcabcr   	functoolsr   typesr   typingr   r   r   r   r   r	   ZpandaspdZanndatar
   r:   r   _utilsr   Z_doc_orgZ	_doc_hostZ_doc_use_cacher   boolZ	DataFramer%   r(   r.   r1   r=   registerfloatrG   r#   r#   r#   r$   <module>   s    )$,+
O
