U
    ս|es                     @   s  d dl Zd dlmZ d dlmZmZmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZ d dlZd dlZd dlmZ d dlZzd dlZW n& ek
r   ed ed	dY nX eejd
d  Zedk red ed	dzd dlZW n ek
r   ed Y nX G dd deZdd Zd)ddZd*ddZd+ddZ d,ddZ!dd Z"d-ddZ#d d! Z$d"d# Z%d.d%d&Z&G d'd( d(ej'j(Z)dS )/    N)UMAP)warncatch_warningsfilterwarnings)TypingError)spectral_layout)check_random_state)KDTreea  The umap.parametric_umap package requires Tensorflow > 2.0 to be installed.
    You can install Tensorflow at https://www.tensorflow.org/install
    
    or you can install the CPU version of Tensorflow using 

    pip install umap-learn[parametric_umap]

    z/umap.parametric_umap requires Tensorflow >= 2.0.   a   Global structure preservation in the umap.parametric_umap package requires 
        tensorflow_probability to be installed. You can install tensorflow_probability at
        https://www.tensorflow.org/probability, 
        
        or via

        pip install --upgrade tensorflow-probability

        Please ensure to install a version which is compatible to your tensorflow 
        installation. You can verify the correct release at 
        https://github.com/tensorflow/probability/releases.

        c                       s   e Zd Zdddddddejjjdddddddddi f fd	d
	Zd fdd	Zd fdd	Z	 fddZ
 fddZdd Zdd Zdd Zdd ZdddZ  ZS ) ParametricUMAPNTF)Zfrom_logits      ?
      r   c                    s  t  jf | || _|| _|| _|| _|| _|| _|	| _|| _	|
| _
|| _|| _dtjkrb|| _ntd d| _|| _|| _d| _|| _|dkr|rtjjd| _qtjjd| _n|| _|r|std d| _| jdk	r|jd jd	 | jkrtd
|jd jd	 | jdS )a  
        Parametric UMAP subclassing UMAP-learn, based on keras/tensorflow.
        There is also a non-parametric implementation contained within to compare
        with the base non-parametric implementation.

        Parameters
        ----------
        optimizer : tf.keras.optimizers, optional
            The tensorflow optimizer used for embedding, by default None
        batch_size : int, optional
            size of batch used for batch training, by default None
        dims :  tuple, optional
            dimensionality of data, if not flat (e.g. (32x32x3 images for ConvNet), by default None
        encoder : tf.keras.Sequential, optional
            The encoder Keras network
        decoder : tf.keras.Sequential, optional
            the decoder Keras network
        parametric_embedding : bool, optional
            Whether the embedder is parametric or non-parametric, by default True
        parametric_reconstruction : bool, optional
            Whether the decoder is parametric or non-parametric, by default False
        parametric_reconstruction_loss_fcn : bool, optional
            What loss function to use for parametric reconstruction, by default tf.keras.losses.BinaryCrossentropy
        parametric_reconstruction_loss_weight : float, optional
            How to weight the parametric reconstruction loss relative to umap loss, by default 1.0
        autoencoder_loss : bool, optional
            [description], by default False
        reconstruction_validation : array, optional
            validation X data for reconstruction loss, by default None
        loss_report_frequency : int, optional
            how many times per epoch to report loss, by default 1
        n_training_epochs : int, optional
            number of epochs to train for, by default 1
        global_correlation_loss_weight : float, optional
            Whether to additionally train on correlation of global pairwise relationships (>0), by default 0
        run_eagerly : bool, optional
            Whether to run tensorflow eagerly
        keras_fit_kwargs : dict, optional
            additional arguments for model.fit (like callbacks), by default {}
        tensorflow_probabilityztensorflow_probability not installed or incompatible to current                 tensorflow installation. Setting global_correlation_loss_weight to zero.r   NMbP?g?zpParametric decoding is not implemented with nonparametric             embedding. Turning off parametric decodingFzNDimensionality of embedder network output ({}) doesnot match n_components ({}))super__init__dimsencoderdecoderparametric_embeddingparametric_reconstruction"parametric_reconstruction_loss_fcn%parametric_reconstruction_loss_weightrun_eagerlyautoencoder_loss
batch_sizeloss_report_frequencysysmodulesglobal_correlation_loss_weightr   reconstruction_validationkeras_fit_kwargsparametric_modeln_training_epochstfkeras
optimizersZAdam	optimizeroutputsshapen_components
ValueErrorformat)selfr*   r   r   r   r   r   r   r   r   r   r#   r   r&   r"   r   r$   kwargs	__class__ Q/var/www/website-v5/atlas_env/lib/python3.8/site-packages/umap/parametric_umap.pyr   ?   sX    >
 zParametricUMAP.__init__c                    s@   | j dkr.|d krtd|| _t ||S t ||S d S NprecomputedzTPrecomputed distances must be supplied if metric                     is precomputed.)metricr.   _Xr   fitr0   XyZprecomputed_distancesr2   r4   r5   r:      s    
zParametricUMAP.fitc                    s@   | j dkr.|d krtd|| _t ||S t ||S d S r6   )r8   r.   r9   r   fit_transformr;   r2   r4   r5   r>      s    
zParametricUMAP.fit_transformc                    s:   | j r"| jjt|| j| jdS td t 	|S dS )aw  Transform X into the existing embedded space and return that
        transformed output.
        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            New data to be transformed.
        Returns
        -------
        X_new : array, shape (n_samples, n_components)
            Embedding of the new data in low-dimensional space.
        r   verbosez_Embedding new data is not supported by ParametricUMAP.                 Using original embedder.N)
r   r   predictnp
asanyarrayr   r@   r   r   	transformr0   r<   r2   r4   r5   rD      s      zParametricUMAP.transformc                    s2   | j r"| jjt|| j| jdS t |S dS )a   "Transform X in the existing embedded space back into the input
        data space and return that transformed output.
        Parameters
        ----------
        X : array, shape (n_samples, n_components)
            New points to be inverse transformed.
        Returns
        -------
        X_new : array, shape (n_samples, n_features)
            Generated data points new data in data space.
        r?   N)	r   r   rA   rB   rC   r   r@   r   inverse_transformrE   r2   r4   r5   rF      s      z ParametricUMAP.inverse_transformc           
      C   s  i }| j rtjjj| jdd}tjjj| jdd}||g}| |}| |}| jr| jrf| 	|}n| 	t
|}tjjjdd dd|}||d< ntjjjdtjd	d
}tt| j|d }tt| j|d }| |dddddf }| |dddddf }|g}tj||gdd}	tjjjdd dd|	}	|	|d< | jdkr|tjjjdd dd||d< t||d| _dS )zDefine the model in kerasto_x)r,   namefrom_xc                 S   s   | S Nr4   xr4   r4   r5   <lambda>      z.ParametricUMAP._define_model.<locals>.<lambda>reconstruction)rH   r   batch_sample)r,   dtyperH   r   Nr   axisc                 S   s   | S rJ   r4   rK   r4   r4   r5   rM   5  rN   umapc                 S   s   | S rJ   r4   rK   r4   r4   r5   rM   <  rN   global_correlation)inputsr+   )r   r'   r(   layersInputr   r   r   r   r   Zstop_gradientLambdaint32squeezegatherheadtailconcatr"   GradientClippedModelr%   )
r0   r+   rG   rI   rV   embedding_toembedding_fromZembedding_to_reconrP   Zembedding_to_fromr4   r4   r5   _define_model  sR    

 
   zParametricUMAP._define_modelc                 C   s   i }i }t | j| j| j| j| j| j}||d< d|d< | jdkrjt|d< | j|d< | j	dkrjt
d d| _	| jr| j|d< | j|d< | jj| j||| j	d	 d
S )z2
        Compiles keras model with losses
        rT   r   r   rU   Fz>Setting tensorflow to run eagerly for global_correlation_loss.TrO   )r*   lossloss_weightsr   N)	umap_lossr   negative_sample_rate_a_bedge_weightr   r"   distance_loss_corrr   r   r   r   r   r%   compiler*   )r0   lossesre   umap_loss_fnr4   r4   r5   _compile_modelD  s6    




zParametricUMAP._compile_modelc              	   C   sF  | j dkr| j}| jd kr.t|d g| _n*t| jdkrXt|t|gt| j }| jrt	|dkszt
|dk rtd t|| j| j| j| j| j| j\}| _}}}| _tt|tjd| _tt|tjd| _| jrd }	n t|| j| j| j| j | jdd	}	t|}
t| j| j | j| j|
| j| j|	\| _| _ | !  | "  | jrvt#|| j | j$ }nd
}| jr| j%d k	rt| jdkrt| j%t| j%gt| j | _%| j%t&| j%fd| j%if}nd }| j'j(|f| j$| j) |d
|d| j*}|j+| _,| jr.| jj-|| j.d}n| jj/d 0 }|i fS )Nr7   r   r   r           zMData should be scaled to the range 0-1 for cross-entropy reconstruction loss.r   spectral)initd   rO   )Zepochssteps_per_epochZmax_queue_sizevalidation_data)r@   )1r8   r9   r   rB   r,   lenreshapelistr   maxminr   construct_edge_datasetgraph_n_epochsr   r   r"   rj   r'   constantexpand_dimsastypeint64r]   r^   init_embedding_from_graphr-   random_state_metric_kwdsprepare_networksr   r   rc   ro   intr   r#   
zeros_liker%   r:   r&   r$   historyZ_historyrA   r@   trainable_variablesnumpy)r0   r<   r}   rr   r   edge_datasetZn_edgesr]   r^   init_embeddingn_datart   ru   r   	embeddingr4   r4   r5   _fit_embed_datai  s    

"


	zParametricUMAP._fit_embed_datac                 C   s   t dd | j D S )Nc                 s   s,   | ]$\}}t ||r|d kr||fV  qdS ))r*   r   r   r%   N)should_pickle).0kvr4   r4   r5   	<genexpr>  s   
 z.ParametricUMAP.__getstate__.<locals>.<genexpr>)dict__dict__items)r0   r4   r4   r5   __getstate__  s    zParametricUMAP.__getstate__c              
   C   s  | j d k	r6tj|d}| j | |r6td| | jd k	rltj|d}| j| |rltd| | jd k	rtj|d}| j| |rtd| t	 b t
d | j | _tj|d}t|d	}t| |tj W 5 Q R X |rtd
| W 5 Q R X d S )Nr   zKeras encoder model saved to {}r   zKeras decoder model saved to {}r%   zKeras full model saved to {}ignore	model.pklwbz*Pickle of ParametricUMAP model saved to {})r   ospathjoinsaveprintr/   r   r%   r   r   r*   
get_config_optimizer_dictopenpickledumpHIGHEST_PROTOCOL)r0   save_locationr@   encoder_outputdecoder_outputparametric_model_outputmodel_outputoutputr4   r4   r5   r     s.    


zParametricUMAP.save)NN)NN)T)__name__
__module____qualname__r'   r(   rm   ZBinaryCrossentropyr   r:   r>   rD   rF   rc   ro   r   r   r   __classcell__r4   r4   r2   r5   r   >   s8   ?%xr   c                 C   s   |   }|  |jd }|dkr:|jd dkr6d}nd}d|j|j|j t| k < |  ||j }|j}|j}|j}||||||fS )a=  
    gets elements of graphs, weights, and number of epochs per edge

    Parameters
    ----------
    graph_ : scipy.sparse.csr.csr_matrix
        umap graph of probabilities
    n_epochs : int
        maximum number of epochs per edge

    Returns
    -------
    graph scipy.sparse.csr.csr_matrix
        umap graph
    epochs_per_sample np.array
        number of epochs to train each sample for
    head np.array
        edge head
    tail np.array
        edge tail
    weight np.array
        edge weight
    n_vertices int
        number of verticies in graph
    r   Nr   '  i     rp   )	tocoosum_duplicatesr,   datary   floateliminate_zerosrowcol)r|   r}   graph
n_verticesepochs_per_sampler]   r^   weightr4   r4   r5   get_graph_elements  s    

r   rq   c                 C   sD  |dkrt d}t|trF|dkrF|jdd|jd |fdtj}nt|tr|dkrt| |||||d}dt	|
  }	||	 tj|jd	|jd |gd
tj }nt|}
t|
jdkr@tj|
ddjd |
jd k r<t|
}|j|
dd\}}t|dddf }|
|jd| |
jd
tj }n|
}|S )a*  Initialize embedding using graph. This is for direct embeddings.

    Parameters
    ----------
    init : str, optional
        Type of initialization to use. Either random, or spectral, by default "spectral"

    Returns
    -------
    embedding : np.array
        the initialized embedding
    Nrandomg      $g      $@r   )lowhighsizerq   )r8   metric_kwds-C6?)scaler   r   rR   )r   r   r   )r   
isinstancestruniformr,   r   rB   float32r   absry   normalarrayrv   uniquer	   querymean)	_raw_datar   r-   r   r8   r   rr   r   initialisation	expansion	init_datatreedistindnndistr4   r4   r5   r   B  sX       	
  r   r   c                 C   s   dd|| d|     S )a  
     convert distance representation into probability,
        as a function of a, b params

    Parameters
    ----------
    distances : array
        euclidean distance between two points in embedding
    a : float, optional
        parameter based on min_dist, by default 1.0
    b : float, optional
        parameter based on min_dist, by default 1.0

    Returns
    -------
    float
        probability in embedding space
    r   r   r4   )	distancesabr4   r4   r5   convert_distance_to_probability|  s    r   r   c                 C   sV   |  t jt ||d }d|   t jt d| |d | }|| }|||fS )a  
    Compute cross entropy between low and high probability

    Parameters
    ----------
    probabilities_graph : array
        high dimensional probabilities
    probabilities_distance : array
        low dimensional probabilities
    EPS : float, optional
        offset to to ensure log is taken of a positive number, by default 1e-4
    repulsion_strength : float, optional
        strength of repulsion between negative samples, by default 1.0

    Returns
    -------
    attraction_term: tf.float32
        attraction term for cross entropy loss
    repellant_term: tf.float32
        repellant term for cross entropy loss
    cross_entropy: tf.float32
        cross entropy umap loss

    r   )r'   mathlogclip_by_value)probabilities_graphprobabilities_distanceEPSrepulsion_strengthZattraction_termZrepellant_termZCEr4   r4   r5   compute_cross_entropy  s    
r   c                    s6   st |d tj fdd}|S )a  
    Generate a keras-ccompatible loss function for UMAP loss

    Parameters
    ----------
    batch_size : int
        size of mini-batches
    negative_sample_rate : int
        number of negative samples per positive samples to train on
    _a : float
        distance parameter in embedding space
    _b : float float
        distance parameter in embedding space
    edge_weights : array
        weights of all edges from sparse UMAP graph
    parametric_embedding : bool
        whether the embeddding is parametric or nonparametric
    repulsion_strength : float, optional
        strength of repulsion vs attraction for cross-entropy, by default 1.0

    Returns
    -------
    loss : function
        loss function that takes in a placeholder (0) and the output of the keras network
    r   c              
      s   t j|ddd\}}t j|dd}t j|dd}t |t jt t |d }t jt j	|| ddt j	|| ddgdd}t
| }t jt t  gdd}	t|	|d\}
}}s| }t |S )Nr   r   )Znum_or_size_splitsrS   r   rR   )r   )r'   splitrepeatr\   r   shuffleranger,   r_   normr   oneszerosr   reduce_mean)Zplaceholder_yZembed_to_fromra   rb   Zembedding_neg_toZ
repeat_negZembedding_neg_fromZdistance_embeddingr   r   Zattraction_lossZrepellant_lossZce_lossrh   ri   r   rg   r   r   Zweights_tiledr4   r5   rd     sD      
 	   zumap_loss.<locals>.loss)rB   tiler'   function)r   rg   rh   ri   Zedge_weightsr   r   rd   r4   r   r5   rf     s
    #,rf   c                 C   s   t jj | } t jj |}dd }|| } ||}t | dd} t |dd}t jj| dd | dd  dd}t jj|dd |dd  dd}|t j|j	d	  }t 
tjjt |dt |dd
}t j|rtd| S )z6Loss based on the distance between elements in a batchc                 S   s   | t |  t j|  S rJ   )r'   r   r   Z
reduce_stdrK   r4   r4   r5   z_score  s    z#distance_loss_corr.<locals>.z_scoreir   r   Nr   rR   g|=)rL   r=   z%NaN values found in correlation loss.)r'   r(   rW   Flattenr   r   Zreduce_euclidean_normr   r   r,   r[   r   statscorrelationr   is_nanr.   )rL   Zz_xr   dxdzZcorr_dr4   r4   r5   rk     s&    $$
 
rk   c           	      C   s2  |rr| dkrt jt jjj|dt jj t jjjdddt jjjdddt jjjdddt jjj|ddg} n:t jjj||dd	}|jd
d |	|g t j|g} |dkr*|r*t jt jjj|dt jjjdddt jjjdddt jjjdddt jjjt
|dddt jj|g}| |fS )a  
    Generates a set of keras networks for the encoder and decoder if one has not already
    been predefined.

    Parameters
    ----------
    encoder : tf.keras.Sequential
        The encoder Keras network
    decoder : tf.keras.Sequential
        the decoder Keras network
    n_components : int
        the dimensionality of the latent space
    dims : tuple of shape (dim1, dim2, dim3...)
        dimensionality of data
    n_data : number of elements in dataset
        # of elements in training dataset
    parametric_embedding : bool
        Whether the embedder is parametric or non-parametric
    parametric_reconstruction : bool
        Whether the decoder is parametric or non-parametric
    init_embedding : array (optional, default None)
        The initial embedding, for nonparametric embeddings

    Returns
    -------
    encoder: tf.keras.Sequential
        encoder keras network
    decoder: tf.keras.Sequential
        decoder keras network
    N)input_shapers   Zrelu)units
activationz)r   rH   r   )Zinput_lengthr   Zrecon)r   rH   r   )r'   r(   Z
SequentialrW   Z
InputLayerr   ZDenseZ	Embeddingbuildset_weightsrB   productZReshape)	r   r   r-   r   r   r   r   r   Zembedding_layerr4   r4   r5   r   7  sF    )
  
  r   c                    s   fdd j d dkrdnd fdd}fd	d
}dd }	t||\}
}}}}}dkr|r|t|dgnt|t||dt||d }}tjt	t|}|| tj
}|| tj
}|rJtjj||f}| }|d}|jdd}|j|tjjjd}|j|tjjjd}|d}n2|	 }tjjj|tjtjftdtdfd}|t||||fS )a  
    Construct a tf.data.Dataset of edges, sampled by edge weight.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        New data to be transformed.
    graph_ : scipy.sparse.csr.csr_matrix
        Generated UMAP graph
    n_epochs : int
        # of epochs to train each edge
    batch_size : int
        batch size
    parametric_embedding : bool
        Whether the embedder is parametric or non-parametric
    parametric_reconstruction : bool
        Whether the decoder is parametric or non-parametric
    c                    s    |  S rJ   r4   )index)r<   r4   r5   gather_index  s    z,construct_edge_dataset.<locals>.gather_indexg&.>g      ?TFc                    sV   r6t | gt jgd }t |gt jgd }nt  | }t  |}||fS )Nr   )r'   Zpy_functionr   r\   )Zedge_toZ	edge_fromedge_to_batchedge_from_batch)r<   r   gather_indices_in_pythonr4   r5   gather_X  s    z(construct_edge_dataset.<locals>.gather_Xc                    s8   dt d i}dkr | |d< r,| |d< | |f|fS )NrT   r   rU   rO   )r'   r   )r   r  r+   )r   r"   r   r4   r5   get_outputs  s    z+construct_edge_dataset.<locals>.get_outputsc                  S   s   dd } | S )z
        The sham generator is a placeholder when all data is already intrinsic to
        the model, but keras wants some input data. Used for non-parametric
        embedding.
        c                   s   s(   t jdt jdt jdt jdfV  q d S )Nr   )rQ   )r'   r   rZ   r4   r4   r4   r5   sham_generator  s    zKconstruct_edge_dataset.<locals>.make_sham_generator.<locals>.sham_generatorr4   )r  r4   r4   r5   make_sham_generator  s    z3construct_edge_dataset.<locals>.make_sham_generatorNi  r   r   )Zdrop_remainder)Znum_parallel_callsr   r   r   )output_shapes)nbytesr   rB   rz   rv   r   r   r   permutationr   r   r'   r   DatasetZfrom_tensor_slicesr   batchmapexperimentalZAUTOTUNEZprefetchZfrom_generatorrZ   ZTensorShape)r<   r|   r}   r   r   r   r"   r  r  r  r   r   r]   r^   r   r   Zedges_to_expZedges_from_expZshuffle_maskr   genr4   )r<   r   r   r  r"   r   r5   r{     sT    

 
  
r{   c                 C   s   z0t t|d }tt | d}W n tjtjj	t
tjjtjjtttfk
r } ztd| | W Y dS d}~X Y nB tk
r } z$td|  d| d|  W Y dS d}~X Y nX dS )	a  
    Checks if a dictionary item can be pickled

    Parameters
    ----------
    key : try
        key for dictionary element
    val : None
        element of dictionary

    Returns
    -------
    picklable: bool
        whether the dictionary item can be pickled
    base64zDid not pickle {}: {}FNzFailed at pickling :z due to T)codecsencoder   dumpsdecodeloadsPicklingErrorr'   errorsZInvalidArgumentError	TypeErrorInternalErrorZNotFoundErrorOverflowErrorr   AttributeErrorr   r/   r.   )keyvalpickledZ	unpickleder4   r4   r5   r     s&    
r   Tc           
      C   s.  t j| d}tt|d}|r0td| |jd }t	t
jj|}||j|_t j| d}t j|rt
jj||_|rtd| t j| d}t j|rt
jj||_td| t|j|j|j|j|j|j}t j| d	}	t j|	r*t
jjj|	d
|id|_td|	 |S )a  
    Load a parametric UMAP model consisting of a umap-learn UMAP object
    and corresponding keras models.

    Parameters
    ----------
    save_location : str
        the folder that the model was saved in
    verbose : bool, optional
        Whether to print the loading steps, by default True

    Returns
    -------
    parametric_umap.ParametricUMAP
        Parametric UMAP objects
    r   rbz-Pickle of ParametricUMAP model loaded from {}rH   r   z"Keras encoder model loaded from {}r   z"Keras decoder model loaded from {}r%   rd   )Zcustom_objectszKeras full model loaded from {})r   r   r   r   loadr   r   r/   r   getattrr'   r(   r)   Zfrom_configr*   existsmodelsZ
load_modelr   r   rf   r   rg   rh   ri   rj   r   r%   )
r   r@   r   model
class_nameZOptimizerClassr   r   rn   r   r4   r4   r5   load_ParametricUMAP#  s@    

 r'  c                   @   s   e Zd ZdZdd ZdS )r`   zg
    We need to define a custom keras model here for gradient clipping,
    to stabilize training.
    c           	   	   C   s   |\}}t  $}| |dd}| j||| jd}W 5 Q R X | j}|||}dd |D }dd |D }| jt|| | j	
|| dd | jD S )	NT)Ztraining)Zregularization_lossesc                 S   s   g | ]}t |d dqS )g      g      @)r'   r   r   gradr4   r4   r5   
<listcomp>w  s     z3GradientClippedModel.train_step.<locals>.<listcomp>c                 S   s(   g | ] }t t j|t ||qS r4   )r'   wherer   r   r   r(  r4   r4   r5   r*  x  s   c                 S   s   i | ]}|j | qS r4   )rH   result)r   mr4   r4   r5   
<dictcomp>  s      z3GradientClippedModel.train_step.<locals>.<dictcomp>)r'   ZGradientTapeZcompiled_lossrm   r   gradientr*   Zapply_gradientszipZcompiled_metricsZupdate_statemetrics)	r0   r   rL   r=   Ztapey_predrd   Ztrainable_varsZ	gradientsr4   r4   r5   
train_stepi  s    
zGradientClippedModel.train_stepN)r   r   r   __doc__r3  r4   r4   r4   r5   r`   c  s   r`   )rq   )r   r   )r   r   )r   )N)T)*r   rB   rT   r   warningsr   r   r   numbar   r   umap.spectralr   sklearn.utilsr   r  r   sklearn.neighborsr	   r    Z
tensorflowr'   ImportErrorr   __version__r   ZTF_MAJOR_VERSIONr   r   r   r   r   r   rf   rk   r   r{   r   r'  r(   Modelr`   r4   r4   r4   r5   <module>   s`   



   Q7 
:
   
1 
W, 
Ou(
@