U
    mdYS                     @   s  d Z ddlZddlZddlmZ ddlmZmZm	Z	m
Z
 ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ d1eeeef eee eee	e df eee	e df eee ed	ddZd2ddZdd Zdd Z d3ddZ!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d4eeeef eee	e e
eef edf ee	e e
eef df ee,eeeedf ee, ed.d/d0Z-dS )5z)Exporting to formats for other software.
    N)Path)UnionOptionalIterableMapping)AnnData)is_categorical_dtype   )_get_mean_var)NeighborsViewn_countsF)	adataproject_dirembedding_methodsubplot_namecell_groupingscustom_color_trackstotal_counts_keyneighbors_key	overwritec	              	      sd  |dkrd}|| j krtd||  krzd| |  krDd| }n6|| j krnd| d | j | d d  }ntd| | j| t|dkrjn| }	|	jd	d	d
 td|	  d	}
dddddg}tfdd|D r|st	
 d d}
nt	
d d | jdk	r<| jj }t| jj}n| j }t| j}|| jkrnt| j| }n|dj}|
rt||d  t|d  t|d  d d }|D ]}||d  qW 5 Q R X td | i }i }|dkr:| jD ].}t| j| rdd | j| D ||< qnzt|trL|g}|D ]b}|| jkrtt	
d|d n<t| j| rdd | j| D ||< nt	
d|d qP|dkr| jD ]*}t| j| st| j| ||< qnvt|tr|g}|D ]^}|| jkr,t	
d |d n8t| j| sRt| j| ||< nt	
d |d! qt|jd" |d#< t ||	d$  i }t!|||}t"||}t#|	d% | i }t$||}t%|	d& | t&| |}t'|	d' |jd" | t(|	d( | tj|	d) t)|jd" d*d+ t*|	d, t)|jd"  +d"dddf  td-d-g,d" dddf  td.d/gdddf  tj|	d0 t-t)|jd" dddf fd1d+ d2|  krtj.|	d3 | jd2 |d4 d5| j kr`t| j| j d5 d6  j/j0 | j| j d5 d6  j/j1} fd7dt2t3|D }t4| ||	d8  dS )9a      Exports to a SPRING project directory [Weinreb17]_.

    Visualize annotation present in `adata`. By default, export all gene expression data
    from `adata.raw` and categorical and continuous annotations present in `adata.obs`.

    See `SPRING <https://github.com/AllonKleinLab/SPRING>`__ or [Weinreb17]_ for details.

    Parameters
    ----------
    adata
        Annotated data matrix: `adata.uns['neighbors']` needs to
        be present.
    project_dir
        Path to directory for exported SPRING files.
    embedding_method
        Name of a 2-D embedding in `adata.obsm`
    subplot_name
        Name of subplot folder to be created at `project_dir+"/"+subplot_name`
    cell_groupings
        Instead of importing all categorical annotations when `None`,
        pass a list of keys for `adata.obs`.
    custom_color_tracks
        Specify specific `adata.obs` keys for continuous coloring.
    total_counts_key
        Name of key for total transcript counts in `adata.obs`.
    overwrite
        When `True`, existing counts matrices in `project_dir` are overwritten.

    Examples
    --------
    See this `tutorial <https://github.com/scverse/scanpy_usage/tree/master/171111_SPRING_export>`__.
    N	neighborszRun `sc.pp.neighbors` first.ZX__paramsZlayoutz.Run the specified embedding method `%s` first.T)parentsexist_okzWriting subplot to zcounts_norm_sparse_genes.hdf5zcounts_norm_sparse_cells.hdf5zcounts_norm.npzztotal_counts.txtz	genes.txtc                 3   s   | ]} |   V  qd S )N)is_file).0f)r    R/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/external/exporting.py	<genexpr>p   s     z!spring_project.<locals>.<genexpr>z} is an existing SPRING folder. A new subplot will be created, but you must set `overwrite=True` to overwrite counts matrices.FzOverwriting the files in .   w
c                 S   s   g | ]}t |qS r   strr   xr   r   r   
<listcomp>   s     z"spring_project.<locals>.<listcomp>zCell grouping z is not in adata.obsc                 S   s   g | ]}t |qS r   r%   r'   r   r   r   r)      s     z is not a categorical variablezCustom color track z is not a continuous variabler   ZUniformzcolor_data_gene_sets.csvzcolor_stats.jsonzcategorical_coloring_data.jsonzgraph_data.jsonz	edges.csvzcell_filter.txtz%i)fmtzcell_filter.npyi     i8zcoordinates.txtz%i,%.6f,%.6fZX_pcazintermediates.npz)ZEpcatotal_countspagagroupsc                    s&   g | ]} |kd d f  dqS )Nr   )meanr   i)clustscoordsr   r   r)      s     zPAGA_data.json)5uns
ValueErrorZ	obsm_keysZobsmr   parentmkdirprintallloggwarningrawXtocsclistZ	var_namesobsnparraysumA1write_hdf5_geneswrite_hdf5_cellswrite_sparse_npzopenwriteZsavetxtr   
isinstancer&   zerosshape_write_color_tracks_get_color_stats_genes_get_color_stats_custom_write_color_stats_build_categ_colors_write_cell_groupings
_get_edges_write_graph_write_edgesZarangesaveminZptpZhstackZsavez_compressedcatcodes
categoriesrangelen_export_PAGA_to_SPRING)r   r   r   r   r   r   r   r   r   Zsubplot_dirZwrite_counts_matricesZbase_dir_filelistE	gene_listr,   ogZcategorical_extrasZcontinuous_extrasZobs_namecolor_statscategorical_coloring_dataedgesZuniq_clustspaga_coordsr   )r2   r3   r   r   spring_project   s    .












  
 &$rf   c                 C   sH   t | |}d|kr|d }n|d }| }dd t|j|jD }|S )NZ	distancesconnectivitiesc                 S   s   g | ]\}}||fqS r   r   r   r1   jr   r   r   r)      s     z_get_edges.<locals>.<listcomp>)r   Ztocooziprowcol)r   r   r   matrixrd   r   r   r   rS      s    

rS   c           
      C   s   |   } t|d}|d}|d}| jd |jd< | jd |jd< t|D ]P\}}| dd|f j }t	
|d }	||	 }|j||d	 |j||	d	 qP|  dS )
zMSPRING standard: filename = main_spring_dir + "counts_norm_sparse_genes.hdf5"r#   countscell_ixr   ncellsr"   ngenesNdata)r>   h5pyFilecreate_grouprL   attrs	enumerateAsqueezerA   nonzerocreate_datasetclose)
r^   r_   filenamehfcounts_groupZ	cix_groupiGra   rn   ro   r   r   r   rE      s    

rE   c                 C   s   |   } t|d}|d}|d}| jd |jd< | jd |jd< t| jd D ]T}| |ddf j }t	
|d }|| }|jt||d	 |jt||d	 qV|  dS )
zMSPRING standard: filename = main_spring_dir + "counts_norm_sparse_cells.hdf5"r#   rn   gene_ixr   rp   r"   rq   Nrr   )Ztocsrrt   ru   rv   rL   rw   r[   ry   rz   rA   r{   r|   r&   r}   )r^   r~   r   r   Z	gix_groupZiCrn   r   r   r   r   rF     s    

rF   c                 C   s   |   } tjj|| |d dS )z@SPRING standard: filename = main_spring_dir + "/counts_norm.npz")
compressedN)r>   scipysparseZsave_npz)r^   r~   r   r   r   r   rG   ,  s    rG   c                 C   sJ   dd t |D }dd |D }||d}t| dtj|ddd d S )	Nc                 S   s   g | ]}t |t |d qS ))namenumberintr0   r   r   r   r)   3  s     z _write_graph.<locals>.<listcomp>c                 S   s$   g | ]\}}t |t |d dqS )r   )sourcetargetZdistancer   rh   r   r   r   r)   4  s     )nodeslinksr#      ),z: )indent
separators)r[   rH   rI   jsondumps)r~   Zn_nodesrd   r   outr   r   r   rT   2  s    
rT   c              	   C   s>   t | d*}|D ]}|d|d |d f  qW 5 Q R X d S )Nr#   z%i;%i
r   r"   )rH   rI   )r~   rd   r   er   r   r   rU   9  s    rU   c                 C   sf   g }|   D ].\}}|d ddd |D  }||g7 }qt|dd d}t|dd| d S )	Nr   c                 S   s   g | ]}d | qS )z%.3fr   r'   r   r   r   r)   B  s     z'_write_color_tracks.<locals>.<listcomp>c                 S   s   |  dd S )Nr   r   )split)r(   r   r   r   <lambda>D      z%_write_color_tracks.<locals>.<lambda>)keyr#   r$   )itemsjoinsortedrH   rI   )Zctracksfnamer   r   Zscoreliner   r   r   rM   ?  s    rM   c                 C   s4   t tjttj| d d d td}d| S )N      Zdtypez#%02x%02x%02x)tuplerA   rB   pltcmZjetr   )fracrgbr   r   r   _frac_to_hexH  s    ,r   c              
   C   s0  t |\}}tj|jtd}t||dk ||dk< |d j}|	d j}d}d| d |jd  }	tj|jd td}
t
|jd D ]}|j|d  |j|  }||	krt|j|j| |j|d   dd|	 |  |
|< nd|
|< ttt|| || || || |
| f| || < q| S )Nr   r   gfffffX@d   g      Y@r"   )r
   rA   rK   rL   floatsqrtrW   ZtodenserD   maxr[   Zindptr
percentilers   r   map)rb   r^   r_   ZmeansZ	variancesZstdevsZminsZmaxesZpctlZpctl_nZpctlsr   Z	n_nonzeror   r   r   rN   M  s(     
&rN   c                 C   sR   |  D ]D\}}tttt|t|t|t|t	|df| |< q| S )Nc   )
r   r   r   r   rA   r/   ZstdrW   r   r   )rb   Zcustom_colorskvr   r   r   rO   e  s    ,
rO   c              	   C   s0   t | d}|tj|ddd W 5 Q R X d S Nr#   r   T)r   	sort_keysrH   rI   r   r   )r~   rb   r   r   r   r   rP   p  s    rP   c                    sB   |  D ]4\}  fddttt D }| d| |< q| S )Nc                    s*   i | ]"\}}|t t|tt  qS r   )r   r   r\   set)r   r1   llabelsr   r   
<dictcomp>w  s    z'_build_categ_colors.<locals>.<dictcomp>)label_colorsZ
label_list)r   rx   r?   r   )rc   r   r   r   r   r   r   rQ   u  s    
rQ   c              	   C   s0   t | d}|tj|ddd W 5 Q R X d S r   r   )r~   rc   r   r   r   r   rR     s    rR   c              
      s   | j d d }| j| jj}dd |D }t| j |d  }| j| jjj  fddtt|D }|d | j krt| j |d  }n*dd l	}	|	j
j| | t| j |d  }| j d d	  \}
}t| j d d	 jd
 }t|d t|krd}nt|t| d  }t|dt| k r.d}nt|t| d  }g }ttt||||||D ].\}}}}}}||t|||||d qbg }t|
||D ]>\}}}||k r||kr|t|t|t|d q|t|d}|||d}dd l}|j|t|ddd d S )Nr-   r.   c                 S   s   g | ]}t |qS r   )r?   )r   xyr   r   r   r)     s     z*_export_PAGA_to_SPRING.<locals>.<listcomp>Z_sizesc                    s(   g | ] }d d t  |kd D qS )c                 S   s   g | ]}t |qS r   r   )r   ri   r   r   r   r)     s     z5_export_PAGA_to_SPRING.<locals>.<listcomp>.<listcomp>r   )rA   r{   r0   Zclus_labelsr   r   r)     s    _colorsr   rg   r      r   )indexsizecolorZcoordinatescellsr   )r   r   weight)Zmin_edge_weightZmax_edge_weight)r   r   edge_weight_metar#   r   )r   )r4   r@   rX   rZ   r?   rY   valuesr[   r\   Zscanpy.plotting.utilsZplottingutilsZ,add_colors_for_categorical_sample_annotationr{   rA   r   rs   r   rj   appendr   r   r   r   dumprH   )r   re   ZoutpathZ	group_keynamesr3   sizesZcell_groupscolorsZscanpysourcestargetsweightsZmin_edge_weight_viewZmin_edge_weight_saver   r1   r   r   r   r   r   r   r   r   r   r   Z	PAGA_datar   r   r   r   r]     sn    

 
     r]   louvainZpercent_mitoZn_genesr   r   2   )r   data_dir	data_nameembedding_keys
annot_keyscluster_field	nb_markerskip_matrixhtml_dirportdo_debugc                 C   s   zddl m} W n  tk
r0   td  Y nX t|}||
 |j| |||||||dd	 |dk	rt|}|j||dd |	dk	r|	||	 dS )a      Export adata to a UCSC Cell Browser project directory. If `html_dir` is
    set, subsequently build the html files from the project directory into
    `html_dir`. If `port` is set, start an HTTP server in the background and
    serve `html_dir` on `port`.

    By default, export all gene expression data from `adata.raw`, the
    annotations `louvain`, `percent_mito`, `n_genes` and `n_counts` and the top
    `nb_marker` cluster markers. All existing files in data_dir are
    overwritten, except `cellbrowser.conf`.

    See `UCSC Cellbrowser <https://github.com/maximilianh/cellBrowser>`__ for
    details.

    Parameters
    ----------
    adata
        Annotated data matrix
    data_dir
        Path to directory for exported Cell Browser files.
        Usually these are the files `exprMatrix.tsv.gz`, `meta.tsv`,
        coordinate files like `tsne.coords.tsv`,
        and cluster marker gene lists like `markers.tsv`.
        A file `cellbrowser.conf` is also created with pointers to these files.
        As a result, each adata object should have its own project_dir.
    data_name
        Name of dataset in Cell Browser, a string without special characters.
        This is written to `data_dir/cellbrowser.conf`.
        Ideally this is a short unique name for the dataset,
        like `"pbmc3k"` or `"tabulamuris"`.
    embedding_keys
        2-D embeddings in `adata.obsm` to export.
        The prefix `X_` or `X_draw_graph_` is not necessary.
        Coordinates missing from `adata` are skipped.
        By default (or when specifying `'all'` or `None`), these keys are tried:
        [`"tsne"`, `"umap"`, `"pagaFa"`, `"pagaFr"`, `"pagaUmap"`, `"phate"`,
        `"fa"`, `"fr"`, `"kk"`, `"drl"`, `"rt"`, `"trimap"`].
        For these, default display labels are automatically used.
        For other values, you can specify a mapping from coordinate name to
        display label, e.g. `{"tsne": "t-SNE by Scanpy"}`.
    annot_keys
        Annotations in `adata.obsm` to export.
        Can be a mapping from annotation column name to display label.
        Specify `None` for all available columns in `.obs`.
    skip_matrix
        Do not export the matrix.
        If you had previously exported this adata into the same `data_dir`,
        then there is no need to export the whole matrix again.
        This option will make the export a lot faster,
        e.g. when only coordinates or meta data were changed.
    html_dir
        If this variable is set, the export will build html
        files from `data_dir` to `html_dir`, creating html/js/json files.
        Usually there is one global html output directory for all datasets.
        Often, `html_dir` is located under a webserver's (like Apache)
        htdocs directory or is copied to one.
        A directory `html_dir`/`project_name` will be created and
        an index.html will be created under `html_dir` for all subdirectories.
        Existing files will be overwritten.
        If do not to use html_dir,
        you can use the command line tool `cbBuild` to build the html directory.
    port
        If this variable and `html_dir` are set,
        Python's built-in web server will be spawned as a daemon in the
        background and serve the files under `html_dir`.
        To kill the process, call `cellbrowser.cellbrowser.stop()`.
    do_debug
        Activate debugging output

    Examples
    --------
    See this
    `tutorial <https://github.com/scverse/scanpy_usage/tree/master/181126_Cellbrowser_exports>`__.
    r   Nz[The package cellbrowser is not installed. Install with 'pip install cellbrowser' and retry.)ZcoordFieldsZ
metaFieldsZclusterFieldr   Z
skipMatrixdoDebug)r   )
Zcellbrowser.cellbrowsercellbrowserImportErrorr:   errorr&   setDebugZscanpyToCellbrowserbuildZserve)r   r   r   r   r   r   r   r   r   r   r   cbr   r   r   r     s2    ]
r   )NNNr   NF)N)F)Nr   r   r   FNNF).__doc__r   loggingr:   pathlibr   typingr   r   r   r   numpyrA   Zscipy.sparser   rt   Zmatplotlib.pyplotZpyplotr   Zanndatar   Zpandas.api.typesr   Zpreprocessing._utilsr
   _utilsr   r&   boolrf   rS   rE   rF   rG   rT   rU   rM   r   rN   rO   rP   rQ   rR   r]   r   r   r   r   r   r   <module>   s         
 a

	R        
