U
    md8                      @   s   d Z ddlmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ deeee  ee ee eed ed ee dd	ddZd eeee edddZd!ddZdd Zdd ZdS )"zeThis modules provides all non-visualization tools for advanced gene ranking and exploration of genes
    )Optional
CollectionN)AnnData)metrics)issparse   )logging)select_groups)Literal   Completepearson)r   GroupRest)r   ZkendallZspearman)	adata	name_listgroupbygroupn_genesdatamethodannotation_keyreturnc                 C   s  |dkrLt  }t| jd D ],\}}	||kr0 qd|| jd | |  qnt||krd|d| }| dd|f }
d}|dks|dkrt|
jr|
j }n|
j}nt| ||\}}|dkrt|
jr|
j|| ddf  }n|
j|| ddf }nX|dkrJt|
jr0|
j||  ddf  }n|
j||  ddf }n
t	
d tj||d	}|j|d
}|dkr|dkr|| jd< n|| jd| t| < n
|| j|< dS )ap      Calculate correlation matrix.

    Calculate a correlation matrix for genes strored in sample annotation
    using :func:`~scanpy.tl.rank_genes_groups`.

    Parameters
    ----------
    adata
        Annotated data matrix.
    name_list
        Takes a list of genes for which to calculate the correlation matrix
    groupby
        If no name list is passed, genes are selected from the
        results of rank_gene_groups. Then this is the key of the sample grouping to consider.
        Note that in this case also a group index has to be specified.
    group
        Group index for which the correlation matrix for top_ranked genes should be calculated.
        Currently only int is supported, will change very soon
    n_genes
        For how many genes to calculate correlation matrix? If specified, cuts the name list
        (in whatever order it is passed).
    data
        At the moment, this is only relevant for the case that name_list is drawn from rank_gene_groups results.
        If specified, collects mask for the called group and then takes only those cells specified.
        If 'Complete', calculate correlation using full data
        If 'Group', calculate correlation within the selected group.
        If 'Rest', calculate corrlation for everything except the group
    method
        Which kind of correlation coefficient to use

        pearson
            standard correlation coefficient
        kendall
            Kendall Tau correlation coefficient
        spearman
            Spearman rank correlation
    annotation_key
        Allows to define the name of the anndata entry where results are stored.
    Nrank_genes_groups_gene_namesr   allr   r   r   z>data argument should be either <Complete> or <Group> or <Rest>)columns)r   ZCorrelation_matrix)list	enumerateunsappendlenr   Xtodenser	   loggerrorpdZ	DataFrameZcorrstr)r   r   r   r   r   r   r   r   jkZadata_relevantgroupsZ
Data_arraygroups_ordergroups_masksZDF_arrayZ	cor_table r,   P/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/tools/_top_genes.pycorrelation_matrix   s>    7





r.   d   )r   r   r   r   c                 C   sh  |dkrt  }t| jd D ],\}}||kr0 qJ|| jd | |  qd}t| ||\}}	|	| }
i }i }i }i }|
}t|D ]\}}| dd|gf j}t|r| }n|}tj	||dddd\||| < ||| < ||| < t
|||  |||  ||| < q|| jd| t| < || jd| t| < || jd| t| < || jd	| t| < dS )
aT      Calculate correlation matrix.

    Calculate a correlation matrix for genes strored in sample annotation

    Parameters
    ----------
    adata
        Annotated data matrix.
    groupby
        The key of the sample grouping to consider.
    group
        Group name or index for which the correlation matrix for top ranked
        genes should be calculated.
        If no parameter is passed, ROC/AUC is calculated for all groups
    n_genes
        For how many genes to calculate ROC and AUC. If no parameter is passed,
        calculation is done for all stored top ranked genes.
    Nr   r   F)Z	pos_labelZsample_weightZdrop_intermediateZROCfprZROCtprZROCthresholdsZROC_AUC)r   r   r   r   r	   r!   r   r"   r   Z	roc_curveZaucr&   )r   r   r   r   r   r'   r(   r)   r*   r+   maskZfprZtpr
thresholdsZroc_aucZy_trueiZvecZy_scorer,   r,   r-   ROC_AUC_analysisy   sF    
    


&r3   {Gz?Gz?c                 C   s   |d kr|  }d S Nr,   )r0   Z	mask_rest	precisionZprobabilityr,   r,   r-   subsampled_estimates   s    r8   c                 C   s   d S r6   r,   )r   Zgroubyr,   r,   r-   dominated_ROC_elimination   s    r9   c                 C   s   d S r6   r,   )r   r0   r1   r,   r,   r-   _gene_preselection   s    r:   )NNNr   r   r   N)Nr/   )Nr4   r5   )__doc__typingr   r   Zpandasr%   Zanndatar   Zsklearnr   Zscipy.sparser    r   r#   _utilsr	   Z_compatr
   r&   intr.   r3   r8   r9   r:   r,   r,   r,   r-   <module>   sJ          
j  H
