U
    md                     @   s   d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dl	Z
d dlZd dlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ e
je
jdddZedddZeedddZeejdddZeejdddZ ddee!ejdddZ"dS )     )urlopen)	HTTPError)ZipFile)BinaryION)sparse   )	_download)settings)logging   )check_datasetdir_exists)	dataframereturnc                 C   s6   |  dd }|dk|t| kB }| jd d | f S )Nc                 S   s   t |  S N)lenunique)x r   ^/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/datasets/_ebi_expression_atlas.py<lambda>       z _filter_boring.<locals>.<lambda>r   )applyr   loc)r   Zunique_valsZ	is_boringr   r   r   _filter_boring   s    r   )	accessionc              
   C   sb   d|  d}zt | W 5 Q R X W n8 tk
r\ } z|j d| d|_ W 5 d }~X Y nX d S )Nz)https://www.ebi.ac.uk/gxa/sc/experiments//z ())r   r   msg)r   base_urler   r   r   	sniff_url   s    
r    c                 C   sf   t |  d|  }| d}| d}tj|  }|jddd t|d |d  t|d |d	  d S )
Nz(https://www.ebi.ac.uk/gxa/sc/experiment/z/download?accessKey=&fileType=z"/download/zip?accessKey=&fileType=T)parentsexist_okzexperiment-designexperimental_design.tsvzquantification-rawexpression_archive.zip)r    r	   
datasetdirmkdirr   )r   r   Z
design_urlZmtx_urlexperiment_dirr   r   r   download_experiment"   s    



r(   )streamr   c           	      C   s   |   }|dr|   }qdd |d d dD \}}}ttjj}||ks\||krdtj}ntj}tj	| dd ||tj
dd}tj|d	 |d
 d
 |d d
 ff||fd}|S )N   %c                 s   s   | ]}t |V  qd S r   )int).0r   r   r   r   	<genexpr>;   s     z'read_mtx_from_stream.<locals>.<genexpr>    z\s+)r   r   r   )sepheaderZdtyper   r   r   )shape)readline
startswithsplitnpZiinfoZint32maxZint64pdread_csvZfloat32r   
csr_matrix)	r)   Zcurlinenm_Z	max_int32Zcoord_dtypedataZmtxr   r   r   read_mtx_from_stream7   s     

".r?   )archiver   c           
   	   C   s   |   }t|dksttdd |D }tdd |D }tdd |D }| |d}t|}W 5 Q R X | |d}tj|dd dd	 }W 5 Q R X | |d$}tj|dd djd d d
f }W 5 Q R X t	
|}	||	_||	_|	S )N   c                 s   s   | ]}|j d r|V  qdS )z.mtxNfilenameendswithr,   ir   r   r   r-   P   s      z/read_expression_from_archive.<locals>.<genexpr>c                 s   s   | ]}|j d r|V  qdS )z	.mtx_rowsNrB   rE   r   r   r   r-   Q   s      c                 s   s   | ]}|j d r|V  qdS )z	.mtx_colsNrB   rE   r   r   r   r-   R   s      r	)r0   r1   r   r   )infolistr   AssertionErrornextopenr?   r8   r9   ZilocanndataAnnDataZ	var_namesZ	obs_names)
r@   infoZmtx_data_infoZmtx_rows_infoZmtx_cols_infofexprvarnameZobsnameadatar   r   r   read_expression_from_archiveM   s    (
rT   F)filter_boring)r   rU   r   c             	   C   s   t j|  }||  d }z t|}|r4t|j|_|W S  tk
rL   Y nX t|  t	d|  d|
   t|d d}t|}W 5 Q R X tj|d ddd	}||j|j< |j|d
d |rt|j|_|S )a      Load a dataset from the `EBI Single Cell Expression Atlas
    <https://www.ebi.ac.uk/gxa/sc/experiments>`__

    Downloaded datasets are saved in the directory specified by
    :attr:`~scanpy._settings.ScanpyConfig.datasetdir`.

    Params
    ------
    accession
        Dataset accession. Like ``E-GEOD-98816`` or ``E-MTAB-4888``.
        This can be found in the url on the datasets page, for example
        https://www.ebi.ac.uk/gxa/sc/experiments/E-GEOD-98816/results/tsne.
    filter_boring
        Whether boring labels in `.obs` should be automatically removed, such as
        labels with a single or :attr:`~anndata.AnnData.n_obs` distinct values.

    Example
    -------
    >>> import scanpy as sc
    >>> adata = sc.datasets.ebi_expression_atlas("E-MTAB-4888")
    z.h5adzDownloaded z to r$   rG   r#   rH   r   )r0   Z	index_colgzip)compression)r	   r%   rM   readr   obsOSErrorr(   loggrO   absoluter   rT   r8   r9   columnswrite)r   rU   r'   Zdataset_pathrS   rP   rY   r   r   r   ebi_expression_atlas`   s&    

r_   )#urllib.requestr   urllib.errorr   zipfiler   typingr   rM   Zpandasr8   numpyr6   Zscipyr   Z	readwriter   Z	_settingsr	    r
   r[   _utilsr   Z	DataFramer   strr    r(   r:   r?   rN   rT   boolr_   r   r   r   r   <module>   s.    