U
    md,                     @   s   d dl mZmZ d dlZd dlmZmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZ dd	lmZ d
dlmZ ddlmZ ddlmZ deeejef ee ee eeeee eeeee eeejef dddZdddZdS )    )OptionalUnionN)issparsespmatrix)LinearOperatorsvds)check_arraycheck_random_state)svd_flip)AnnData   )logging   )_get_mean_var)	AnyRandom)settingsTarpackFfloat32)datan_compszero_center
svd_solverrandom_statereturn_infouse_highly_variabledtypecopychunked
chunk_sizereturnc                 C   s   t d}|dkr |	s t d t| t}|r@|r:|  n| }nt| | jd}|dkrld|j krltd|dkrd|j krdnd	}|rt d
 |r|dd|jd f n|}|dkrt	|j
|j}tj|kr|d }ntj}t d|  t|}|j}|	r|r |s |dkr*t d ddlm} t|jd |f|j}||d}||
D ],\}}}t|r~| n|}|| qb||
D ]4\}}}t|r| n|}|||||< qn$t|r|dkr:|r:ddlm} t|r |dkr t d | }||||d}||}nt|r|rddlm} |dkrdd}|dkrvtdt||||d}|d }|||d}|d |_|d |_ |d |_!n>|sddlm"} t d  ||||d!}||}nt#d"|jj$t|j$kr|%|}|r||j&d< i |j'd#< ||d$|j'd# d%< |r~tj|j
|fd&|j(d'< |jj)|j(d' |jd < n|jj)|j(d'< |j |j'd# d< |j!|j'd# d< t jd(|d) t d* |r|S dS t jd(|d) |r||j|j!|j fS |S dS )+u|      Principal component analysis [Pedregosa11]_.

    Computes PCA coordinates, loadings and variance decomposition.
    Uses the implementation of *scikit-learn* [Pedregosa11]_.

    .. versionchanged:: 1.5.0

        In previous versions, computing a PCA on a sparse matrix would make a dense copy of
        the array for mean centering.
        As of scanpy 1.5.0, mean centering is implicit.
        While results are extremely similar, they are not exactly the same.
        If you would like to reproduce the old results, pass a dense array.

    Parameters
    ----------
    data
        The (annotated) data matrix of shape `n_obs` × `n_vars`.
        Rows correspond to cells and columns to genes.
    n_comps
        Number of principal components to compute. Defaults to 50, or 1 - minimum
        dimension size of selected representation.
    zero_center
        If `True`, compute standard PCA from covariance matrix.
        If `False`, omit zero-centering variables
        (uses :class:`~sklearn.decomposition.TruncatedSVD`),
        which allows to handle sparse input efficiently.
        Passing `None` decides automatically based on sparseness of the data.
    svd_solver
        SVD solver to use:

        `'arpack'` (the default)
          for the ARPACK wrapper in SciPy (:func:`~scipy.sparse.linalg.svds`)
        `'randomized'`
          for the randomized algorithm due to Halko (2009).
        `'auto'`
          chooses automatically depending on the size of the problem.
        `'lobpcg'`
          An alternative SciPy solver.

        .. versionchanged:: 1.4.5
           Default value changed from `'auto'` to `'arpack'`.

        Efficient computation of the principal components of a sparse matrix
        currently only works with the `'arpack`' or `'lobpcg'` solvers.

    random_state
        Change to use different initial states for the optimization.
    return_info
        Only relevant when not passing an :class:`~anndata.AnnData`:
        see “**Returns**”.
    use_highly_variable
        Whether to use highly variable genes only, stored in
        `.var['highly_variable']`.
        By default uses them if they have been determined beforehand.
    dtype
        Numpy data type string to which to convert the result.
    copy
        If an :class:`~anndata.AnnData` is passed, determines whether a copy
        is returned. Is ignored otherwise.
    chunked
        If `True`, perform an incremental PCA on segments of `chunk_size`.
        The incremental PCA automatically zero centers and ignores settings of
        `random_seed` and `svd_solver`. If `False`, perform a full PCA.
    chunk_size
        Number of observations to include in each chunk.
        Required if `chunked=True` was passed.

    Returns
    -------
    X_pca : :class:`~scipy.sparse.spmatrix`, :class:`~numpy.ndarray`
        If `data` is array-like and `return_info=False` was passed,
        this function only returns `X_pca`…
    adata : anndata.AnnData
        …otherwise if `copy=True` it returns or else adds fields to `adata`:

        `.obsm['X_pca']`
             PCA representation of data.
        `.varm['PCs']`
             The principal components containing the loadings.
        `.uns['pca']['variance_ratio']`
             Ratio of explained variance.
        `.uns['pca']['variance']`
             Explained variance, equivalent to the eigenvalues of the
             covariance matrix.
    zcomputing PCA>   
randomizedautozNote that scikit-learn's randomized PCA might not be exactly reproducible across different computational platforms. For exact reproducibility, choose `svd_solver='arpack'.`)r   TZhighly_variablezDid not find adata.var['highly_variable']. Either your data already only consists of highly-variable genes or consider running `pp.highly_variable_genes` first.NFz    on highly variable genesr   z    with n_comps=r   z.Ignoring zero_center, random_state, svd_solverr   )IncrementalPCA)n_componentsr    )PCAz{svd_solver 'randomized' does not work with sparse input. Densifying the array. This may take a very large amount of memory.)r#   r   r   r!   >   lobpcgr   zksvd_solver: {svd_solver} can not be used with sparse input.
Use "arpack" (the default) or "lobpcg" instead.)solverr   X_pca)r#   r   
componentsvariancevariance_ratio)TruncatedSVDa      without zero-centering: 
    the explained variance does not correspond to the exact statistical defintion
    the first component, e.g., might be heavily influenced by different means
    the following components often resemble the exact PCA very closely)r#   r   	algorithmz0This shouldn't happen. Please open a bug report.pca)r   r   params)shapeZPCsz    finished)timezand added
    'X_pca', the PCA coordinates (adata.obs)
    'PC1', 'PC2', ..., the loadings (adata.var)
    'pca_variance', the variance / eigenvalues (adata.uns)
    'pca_variance_ratio', the variance ratio (adata.uns))*logginfo
isinstancer   r   r   varkeys
ValueErrorminZn_varsZn_obsr   ZN_PCSr	   XdebugZsklearn.decompositionr"   npZzerosr/   Z	chunked_Xr   ZtoarrayZpartial_fitZ	transformr$   warningZfit_transform_pca_with_sparseZcomponents_Zexplained_variance_Zexplained_variance_ratio_r+   	ExceptiondescrZastypeZobsmZunsZvarmT)r   r   r   r   r   r   r   r   r   r   r   Z
logg_startZdata_is_AnnDataZadataZ
adata_compZmin_dimr8   r"   r'   Zpca_chunk_startendr$   outputr+    rE   R/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/scanpy/preprocessing/_pca.pyr-      s    c






  

   

  


r-   c                    s  t |}tj|  tjt| j}t| ddgd} |d kr`| 	dj
 d d d f }|j|jj|jj| j| j j  t| jd d d d f jfdd}fdd} fd	d
}fdd}	t|| j|| j||	d}
t|
|||d\}}}t||\}}t| }||d d f }|| d d |f }|| d | jd d  }t| d  }|| }||||d}|S )NZcsrZcsc)Zaccept_sparser   c                    s    | |  S NrE   x)XdotmdotrE   rF   matvec  s    z _pca_with_sparse.<locals>.matvecc                    s    | |  S rG   rE   rH   )XmatmmatrE   rF   matmat  s    z _pca_with_sparse.<locals>.matmatc                    s    | |  S rG   rE   rH   )XHdotmhdotonesrE   rF   rmatvec   s    z!_pca_with_sparse.<locals>.rmatvecc                    s    | |  S rG   rE   rH   )XHmatmhmatrR   rE   rF   rmatmat#  s    z!_pca_with_sparse.<locals>.rmatmat)rL   r   rO   r/   rS   rV   )r&   kZv0r   r   )r'   r)   r*   r(   )r	   r:   randomZ	set_stateZ	get_stateZrandr7   r/   r   ZmeanAflattendotr?   ZconjrR   r   r   r   r
   Zargsortr   sum)r8   Znpcsr&   mur   Zrandom_initrL   rO   rS   rV   ZXLusvidxr'   ZevZ	total_varZev_ratiorD   rE   )	rP   rT   rJ   rM   rK   rQ   rU   rN   rR   rF   r<     sR    	r<   )
NTr   r   FNr   FFN)r   NN)typingr   r   numpyr:   Zscipy.sparser   r   Zscipy.sparse.linalgr   r   Zsklearn.utilsr   r	   Zsklearn.utils.extmathr
   Zanndatar    r   r1   _utilsr   r   r   Zndarrayintboolstrr-   r<   rE   rE   rE   rF   <module>   sH              x