U
    vId=                     @   s  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZmZmZ d dlmZ d dlZd dlZd dlmZ d dlZd dlZd dlZd d	lmZ d
dlmZ d
dlmZ ddl m!Z! ddl"m#Z# zddl$m%Z% W n0 e&k
r Z' ze'Z(dd Z%W 5 dZ'['X Y nX d?eeee) f e	e) e	e* e)edddZ+d@eee)e,f e)edddZ-dAeedddZ.ee)eddd Z/e
e)e
e)ee) f eej0e
e)ej1f f d!d"d#Z2ed$d%d&d'd(d)dd*ddei ei d+
ee*e*e)e)e	e
e)ee) f  e)e	e
e)ee) f  e)e
e)ee) f e
e)ee) f ed,d-d.Z3dBee)ed/d0d1Z4dCeeee) f e	e) e	e* e)edd2d3Z5ee) ee)ddf d4d5d6Z6ee) e	e) e	e* e)ed7d8d9Z7dDd;d<Z8d=d> Z9dS )E    )Path)PathLikefspath)MappingProxyType)UnionOptionalMappingTuple)IterableIterator	Generator)OrderedDictN)warn)sparse   )AnnData)_deprecate_positional_args   )is_float)	read_h5ad)	read_zarrc                  O   s   t d S )N)e)___ r   I/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/anndata/_io/read.pyr      s    r   ,float32)filename	delimiterfirst_column_namesdtypereturnc                 C   s   t | |||S )a      Read `.csv` file.

    Same as :func:`~anndata.read_text` but with default delimiter `','`.

    Parameters
    ----------
    filename
        Data file.
    delimiter
        Delimiter that separates data within text file.
        If `None`, will split at arbitrary number of white spaces,
        which is different from enforcing splitting at single white space `' '`.
    first_column_names
        Assume the first column stores row names.
    dtype
        Numpy data type.
    )	read_text)r   r   r    r!   r   r   r   read_csv   s    r$   )r   sheetr!   r"   c                 C   sz   ddl m} |t| |}|jddddf }t|jdddf jtd}ttj	|j
dd tdd}t|||S )z    Read `.xlsx` (Excel) file.

    Assumes that the first columns stores the row names and the first row the
    column names.

    Parameters
    ----------
    filename
        File name to read from.
    sheet
        Name of sheet in Excel file.
    r   )
read_excelNr   )	row_namesr!   )	col_names)pandasr&   r   valuesdictZilocastypestrnparraycolumnsr   )r   r%   r!   r&   ZdfXrowcolr   r   r   r&   9   s     r&   )r   r"   c                 C   s   t j| dddd}tj|d |d jj|d jjff|d}t jt j|d jjddd}t jt j|d jjddd}t	|||d	S )
z    Read a gzipped condensed count matrix from umi_tools.

    Parameters
    ----------
    filename
        File name to read from.
    category)genecellr(   countr7   r6   )name)index)r2   obsvar)
pdZ
read_tabler   
csr_matrixcatcodes	DataFrameIndex
categoriesr   )r   r!   tabler2   r;   r<   r   r   r   read_umi_toolsS   s    rE   )r   keyr"   c           	   	   C   s   t | d|}dd | D }|dkr>td|  d| d|| d }i i g}td	d
gD ]$\}}||kr^|| d || |< q^W 5 Q R X t||d |d }|S )z    Read `.h5` (hdf5) file.

    Note: Also looks for fields `row_names` and `col_names`.

    Parameters
    ----------
    filename
        Filename of data file.
    key
        Name of dataset in the file.
    rc                 S   s   g | ]}|qS r   r   ).0kr   r   r   
<listcomp>z   s     zread_hdf.<locals>.<listcomp> z	The file z stores the following sheets:
z&
Call read/read_hdf5 with one of them.r   r'   r)   r   r   )h5pyFilekeys
ValueError	enumerater   )	r   rF   frN   r2   Z	rows_colsinamer9   adatar   r   r   read_hdfj   s     rT   )inputidx_namedimm_mappingr"   c           	         s   t  }i }| D ]&\}}t fdd|D j||< q  D ]2\}}|jdkrn|jd dkrn|||< qD|||< qD||kr|j|ddd ||fS )Nc                    s   g | ]}  |qS r   )pop)rH   r9   rU   r   r   rJ      s     z(_fmt_loom_axis_attrs.<locals>.<listcomp>r   T)ZdropZinplace)	r=   rA   itemsr/   r0   TndimshapeZ	set_index)	rU   rV   rW   Zaxis_dfZaxis_mappingrF   namesrI   vr   rY   r   _fmt_loom_axis_attrs   s     

r`   z0.9)versionTFZsplicedZCellIDZGene)
r   cleanupX_name	obs_names
obsm_names	var_names
varm_namesr!   obsm_mappingvarm_mapping)r   r   rb   rc   rd   re   rf   rg   r!   rh   ri   r"   c       
      
   K   sP  |dk	r&t dt |	i kr"td|}	|dk	rLt dt |
i krHtd|}
t| } ddlm} || df|}||j krd	}|r|j|  j	
 n|j| d
 j	}|j|dd}t }|d	kr|r|jd	  j	
 n|jd	 d
 j	|d< |j D ]:}|d	kr|r$|j|  j	
 n|j| d
 j	||< qtt|j||	\}}tt|j||
\}}i }|ri }t| D ]4}tt|| dkr|| d || < ||= q|r||d< i }t| D ]4}tt|| dkr|| d || < ||= q|r||d< t|||||r,|nd|r8|nd|d}W 5 Q R X |S )a      Read `.loom`-formatted hdf5 file.

    This reads the whole file into memory.

    Beware that you have to explicitly state when you want to read the file as
    sparse data.

    Parameters
    ----------
    filename
        The filename.
    sparse
        Whether to read the data matrix as sparse.
    cleanup
        Whether to collapse all obs/var fields that only store
        one unique value into `.uns['loom-.']`.
    X_name
        Loompy key with which the data matrix :attr:`~anndata.AnnData.X` is initialized.
    obs_names
        Loompy key where the observation/cell names are stored.
    obsm_mapping
        Loompy keys which will be constructed into observation matrices
    var_names
        Loompy key where the variable/gene names are stored.
    varm_mapping
        Loompy keys which will be constructed into variable matrices
    **kwargs:
        Arguments to loompy.connect

    Example
    -------

    .. code:: python

        pbmc = anndata.read_loom(
            "pbmc.loom",
            sparse=True,
            X_name="lognorm",
            obs_names="cell_names",
            var_names="gene_names",
            obsm_mapping={
                "X_umap": ["umap_1", "umap_2"]
            }
        )
    NzbArgument obsm_names has been deprecated in favour of `obsm_mapping`. In 0.9 this will be an error.zfReceived values for both `obsm_names` and `obsm_mapping`. This is ambiguous, only pass `obsm_mapping`.zbArgument varm_names has been deprecated in favour of `varm_mapping`. In 0.9 this will be an error.zfReceived values for both `varm_names` and `varm_mapping`. This is ambiguous, only pass `varm_mapping`.r   )connectrG   rK   r   F)copymatrixr   zloom-obszloom-var)r;   r<   layersobsmvarmuns)r   FutureWarningrO   r   Zloompyrj   rm   rN   r   r[   Ztocsrr-   r   r`   r,   Z	col_attrsZ	row_attrslistlensetr   )r   r   rb   rc   rd   re   rf   rg   r!   rh   ri   kwargsrj   lcr2   rm   rF   r;   rn   r<   ro   rp   Zuns_obsZuns_varrS   r   r   r   	read_loom   s    ?(&

	rw   )r   r!   r"   c                 C   s:   ddl m} |t| |}ddlm} ||}t|S )z    Read `.mtx` file.

    Parameters
    ----------
    filename
        The filename.
    dtype
        Numpy data type.
    r   )mmreadr>   )Zscipy.iorx   r   r-   Zscipy.sparser>   r   )r   r!   rx   r2   r>   r   r   r   read_mtx.  s
    rz   c              
   C   s   t | tttfst| |||S t| } | jdkrftjt| dd}t||||W  5 Q R  S Q R X nj| jdkrt	jt| dd}t||||W  5 Q R  S Q R X n*|  }t||||W  5 Q R  S Q R X dS )a      Read `.txt`, `.tab`, `.data` (text) file.

    Same as :func:`~anndata.read_csv` but with default delimiter `None`.

    Parameters
    ----------
    filename
        Data file, filename or stream.
    delimiter
        Delimiter that separates data within text file. If `None`, will split at
        arbitrary number of white spaces, which is different from enforcing
        splitting at single white space `' '`.
    first_column_names
        Assume the first column stores row names.
    dtype
        Numpy data type.
    z.gzrt)modez.bz2N)

isinstancer   r.   bytes
_read_textr   suffixgzipopenbz2)r   r   r    r!   rQ   r   r   r   r#   C  s    
"
"
r#   )	file_liker"   c                 c   s"   | D ]}| d}|r|V  qdS )z<Helper for iterating only nonempty lines without line breaksz
N)rstrip)r   liner   r   r   
iter_linesj  s    
r   )rQ   r   r    r!   r"   c                 C   s`  g }g }t | }g }g }|D ]}	|	drD|	d}
|
r||
 q|d k	rd||	krdtd|d|	|}t|d s|}nTt|d r|rd}||d  |tj|dd  |d	 n|tj||d	  qq|st	|dkrt|d  }nt
t	|d t}tj|td	}|d kr6d
}|D ]n}	|	|}|s\t|d sd}||d  |tj|dd  |d	 n|tj||d	  qq:t	|dkr|d j|d jkrd}t|d tt}||d d tt |d dd  g}|D ]V}	|	|}|r`||d  |tj|dd  |d	 n|tj||d	 q |d j|d jkrtd|d j d|d j dtj||d	}|st
t	|t}n,t|}t|D ]\}}|d||< q|j|jd kr$|dd  }t|D ]\}}|d||< q,t|t|dt|ddS )N#z# zDid not find delimiter z in first line.r   Tr   r(   FzLength of first line (z)) is different from length of last line (z).")rd   )rf   )r;   r<   )r   
startswithlstripappendrO   splitr   r/   r0   rs   Zaranger-   r.   sizeintrP   stripr]   r   r,   )rQ   r   r    r!   commentsdatalinesr)   r'   r   commentZ	line_listrR   r9   r   r   r   r   r  s    





$

r   r2   c                 C   sb   ddl m} | d}|| | d | | d | | d f| | d d| |< t| | | S )	Nr   ry   Z_csr_data_indices_indptr_shape)r]   )Zscipy.sparse.csrr>   del_sparse_matrix_keys)drF   r>   key_csrr   r   r   load_sparse_csr  s    
&

r   c                 C   s4   | | d= | | d= | | d= | | d= d S )Nr   r   r   r   r   )mappingr   r   r   r   r     s    r   )r   Nr   )r   )N)r   )NNr   )r2   ):pathlibr   osr   r   typesr   typingr   r   r   r	   r
   r   r   collectionsr   r   r   warningsr   rL   numpyr/   r*   r=   Zscipyr   rK   r   compatr   utilsr   Zh5adr   Zzarrr   ImportError_er   r.   boolr$   r   r&   rE   rT   rA   Zndarrayr`   rw   rz   r#   r   r   r   r   r   r   r   r   <module>   s        
 "      '	k
