U
    ֽ|e                  
   @   s   d dl Zd dlZd dlmZ d dlmZ d dlm	Z	 e
 dd Zej
dddejjejjejjdd	d
d Zdd Zdd Zd%ddZdd ZdddejddddfddZdd ZdddejddddfddZdd Zd&d#d$ZdS )'    N)tau_rand_int)tqdmc                 C   s    | dkrdS | dk rdS | S dS )zStandard clamping of a value into a fixed range (in this case -4.0 to
    4.0)

    Parameters
    ----------
    val: float
        The value to be clamped.

    Returns
    -------
    The clamped value, now fixed to be in the range -4.0 to 4.0.
          @g      N )valr   r   I/var/www/website-v5/atlas_env/lib/python3.8/site-packages/umap/layouts.pyclip   s
    r   zf4(f4[::1],f4[::1])T)resultdiffdim)fastmathcachelocalsc                 C   s<   d}| j d }t|D ] }| | ||  }||| 7 }q|S )zReduced Euclidean distance.

    Parameters
    ----------
    x: array of shape (embedding_dim,)
    y: array of shape (embedding_dim,)

    Returns
    -------
    The squared euclidean distance between x and y
            r   )shaperange)xyr	   r   ir
   r   r   r   rdist   s    
r   c           0      C   s(  t |jd D ]}|| |kr|| }|| }| | }|| }t||} |r\dd|t| |   }!|| t| |d  d|t| |   }"|!||  }#|!||  }$|#d|d|!   t||  |"  }%|$d|d|!   t||  |"  }&|| }'|| ||| |  |'  }(|| ||| |  |'  })|| |(|% |)|&   || |  | }*| dkrd| | t| |d  }+|+|t| | d  }+nd}+t|
D ]t},t|+||, ||,   }-|r|-td|* ||, ||,   7 }-||,  |-| 7  < |r||,  |- | 7  < q||  || 7  < t	|||  ||  }.t|.D ]}/t
|| }|| }t||} | dkrd|	 | }+|+d|  |t| | d   }+n||krqNnd}+t|
D ]B},|+dkrt|+||, ||,   }-nd	}-||,  |-| 7  < qqN||  |.||  7  < qd S )
Nr         ?   r                    @MbP?r   )numbapranger   r   pownpexpr   r   intr   )0head_embeddingtail_embeddingheadtail
n_verticesepochs_per_sampleab	rng_stategammar   
move_otheralphaepochs_per_negative_sampleepoch_of_next_negative_sampleepoch_of_next_samplendensmap_flagdens_phi_sumdens_re_sumdens_re_covdens_re_stddens_re_meandens_lambdadens_Rdens_mudens_mu_totr   jkcurrentotherdist_squaredphiZ	dphi_termZq_jkZq_kjZdrkZdrjZ	re_std_sqweight_kZweight_jZgrad_cor_coeff
grad_coeffdgrad_dn_neg_samplespr   r   r   '_optimize_layout_euclidean_single_epoch=   s    
&  

 




rH   c                 C   s   | d | d t|jD ]}|| }	|| }
| |	 }||
 }t||}dd|t||   }||	  || 7  < ||
  || 7  < ||	  |7  < ||
  |7  < q d}t|jD ]"}t||| ||   ||< qd S )Nr   r   g:0yE>)	fillr   r   sizer   r   r   r   log)r"   r#   r$   r%   r(   r)   Zre_sumZphi_sumr   r<   r=   r>   r?   r@   rA   epsilonr   r   r   -_optimize_layout_euclidean_densmap_epoch_init   s     



rM   r         @Fc           &      C   s  | j d }|}|| }| }| }tjtd|d}|dkrBi }|dkrNi }|rtjtd|d}t|d d }|d }|d }|d	 }tj|tj	d
}tj|tj	d
}|d } nHd}d}tjdtj	d
}tjdtj	d
}tjdtj	d
}tjdtj	d
}d|kr| |d< t
t|f|D ]}!|oT|d dkoT|!d t| d|d  k}"|"r|| ||||||| tt||  }#t|}$t|||d  }%nd}#d}$d}%|| ||||||||	|
|||||||!|"|||%|#|$|||| |dt|!t|   }q"| S )a2  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).
    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.
    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.
    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.
    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.
    n_epochs: int
        The number of training epochs to use in optimization.
    n_vertices: int
        The number of vertices (0-simplices) in the dataset.
    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.
    a: float
        Parameter of differentiable approximation of right adjoint functor
    b: float
        Parameter of differentiable approximation of right adjoint functor
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.
    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.
    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.
    parallel: bool (optional, default False)
        Whether to run the computation using numba parallel.
        Running in parallel is non-deterministic, and is not used
        if a random seed has been set, to ensure reproducibility.
    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.
    densmap: bool (optional, default False)
        Whether to use the density-augmented densMAP objective
    densmap_kwds: dict (optional, default None)
        Auxiliary data for densMAP
    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.
    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding
    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Tr   parallelNmu_sumr   lambdaRmu)dtype	var_shiftr   disablefracr   )r   copyr   njitrH   rM   r   sumzerosfloat32r   r   floatsqrtvarmeandot)&r"   r#   r$   r%   n_epochsr&   r'   r(   r)   r*   r+   initial_alphanegative_sample_raterP   verbosedensmapdensmap_kwds	tqdm_kwdsr,   r   r-   r.   r/   r0   optimize_fnZdens_init_fnr;   r8   r9   r:   r3   r4   dens_var_shiftr1   r2   r6   r7   r5   r   r   r   optimize_layout_euclidean   s    N
  




rl   c           "   	   C   s  t | jd D ]}|| |kr|| }|| }|| }|| }|||f| \}}|||f| \}}|dkrtd|t|d|   d}nd}d| |d  |d  }t |D ]P}t|||  }||  ||	 7  < |
rt|||  }||  ||	 7  < q||  | | 7  < t|||  ||  } t | D ]}!t|| }|| }|||f| \}}|dkrtd|t|d|   d}n||krq8nd}|d | | |d  }t |D ]*}t|||  }||  ||	 7  < qq8||  | ||  7  < q||fS )Nr   r   r   r   r   ư>)r   r   r   r   r!   r   )"r'   r0   r$   r%   r"   r#   output_metricoutput_metric_kwdsr   r-   r,   r1   r/   r.   r*   r&   r(   r)   r+   r   r<   r=   r>   r?   dist_outputgrad_dist_output_Zrev_grad_dist_outputw_lrC   rD   rE   rF   rG   r   r   r   %_optimize_layout_generic_single_epoch  sb     
 



ru   r   c                 C   s   | j d }|}|| }| }| }tjtdd}|dkr@i }d|krR| |d< tt|f|D ]H}|||||| ||||||||||	||||
 |dt|t|   }qb| S )a	  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).

    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.

    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.

    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.

    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.

    n_epochs: int
        The number of training epochs to use in optimization.

    n_vertices: int
        The number of vertices (0-simplices) in the dataset.

    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    rng_state: array of int64, shape (3,)
        The internal state of the rng

    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.

    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.

    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.

    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Tr   NrW   r   )r   rY   r   rZ   ru   r   r   r^   )r"   r#   r$   r%   rc   r&   r'   r(   r)   r*   r+   rd   re   ro   rp   rf   ri   r,   r   r-   r.   r/   r0   rj   r1   r   r   r   optimize_layout_generic  sJ    V

rw   c           "      C   s  t | jd D ]}|| |kr|| }|| }|| }|| }|||f| \}}|| }d||	|  d   }t |
D ]B}t|||  }||  || 7  < |rz||  | | 7  < qz||  | | 7  < t|||  ||  }t |D ]} t|| }|| }|||f| \}}tt|||  d |	| d  }!| d|! d|! |	|  d   }t |
D ]*}t|||  }||  || 7  < qnq||  |||  7  < qd S )Nr   r   rn   )r   r   r   r!   r   r   r    max)"r'   r0   r$   r%   r"   r#   ro   rp   weightsigmasr   r-   r,   r1   r/   r.   r*   r&   rhosr+   r   r<   r=   r>   r?   rq   rr   rt   rC   rD   rE   rF   rG   Zw_hr   r   r   %_optimize_layout_inverse_single_epochi  sP     
 
&"
r|   c                 C   s   | j d }|}|	| }| }|	 }tjtdd}|dkr@i }d|krR| |d< tt|f|D ]J}||	|||| ||||||||||||||| |dt|t|   }qb| S )a
  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).

    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.

    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.

    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.

    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.

    weight: array of shape (n_1_simplices)
        The membership weights of the 1-simplices.

    sigmas:

    rhos:

    n_epochs: int
        The number of training epochs to use in optimization.

    n_vertices: int
        The number of vertices (0-simplices) in the dataset.

    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    rng_state: array of int64, shape (3,)
        The internal state of the rng

    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.

    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.

    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.

    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Trv   NrW   r   )r   rY   r   rZ   r|   r   r   r^   )r"   r#   r$   r%   ry   rz   r{   rc   r&   r'   r(   r)   r*   r+   rd   re   ro   rp   rf   ri   r,   r   r-   r.   r/   r0   rj   r1   r   r   r   optimize_layout_inverse  sL    `

r}   c           (      C   s  t |}|jd d d }d}|D ]}|jd |kr"|jd }q"t|tj}tjt|	d  tj	| t
|D ]t}|D ]h}||| jd k r|| | |kr|| | }|| | }| | | }|| | }t||}|dkr"d| | t||d  }||t|| d  }nd}t
|D ]} t|||  ||    }!t
| |D ]}"||" }#||#  krd  kr|"krXn nh|||"| |f }$|$dkrX|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!qX||   t|!| 7  < |r.t|||  ||    }%t
| |D ]}"||" }#||#  krjd  krj|"kr:n nh|||"| |f }$|$dkr:|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!q:||   t|%| 7  < q.|| |  || | 7  < || | dkrFt||| |  || |  }&nd}&t
|&D ]x}'t|	|| jd  }|| | }t||}|dkrd|
 | }|d| |t|| d   }n||krʐqRnd}t
|D ]} |dkrt|||  ||    }!nd	}!t
| |D ]}"||" }#||#  kr>d  kr>|"krn nh|||"| |f }$|$dkr|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!q||   t|!| 7  < q֐qR|| |  |&|| |  7  < qqzd S )
Nr   r   r   r   r   r   r   r   r   )lenr   r   arangeastypeint32randomseedabsshuffler   r   r   r   r    r!   r   )(head_embeddingstail_embeddingsheadstailsr'   r(   r)   regularisation_weights	relationsr*   r+   lambda_r   r,   r-   r.   r/   r0   r1   Zn_embeddingsZwindow_sizeZmax_n_edgesZe_p_sZembedding_orderr   mr<   r=   r>   r?   r@   rC   rD   rE   offsetZ
neighbor_mZidentified_indexZother_grad_drF   rG   r   r   r   /_optimize_layout_aligned_euclidean_single_epoch?  s    
"

(

(






(

 r   P);?V?{Gzt?c                 C   sJ  | d j d }|}tjjtjjd d d }tjjtjjd d d }tjjtjjd d d }tt|D ]J}|	|| 
tj|  |	|| 
tj |	|| 
tj qrtjtd|d}|d kri }d|kr| |d< tt|f|D ]H}|| |||||	|
|||||||||||| |dt|t|   }q| S )Nr   r   TrO   rW   r   )r   r   typedList
empty_listtypesr]   r   r~   appendr   r   rZ   r   r   r^   )r   r   r   r   rc   r'   r   r   r*   r(   r)   r+   r   rd   re   rP   rf   ri   r,   r   r-   r.   r/   r0   r   rj   r1   r   r   r   !optimize_layout_aligned_euclidean  s`    
r   )	r   r   rN   FFFNNF)
r   r   r   r   r   rN   TFNF)numpyr   r   umap.distances	distancesdist
umap.utilsr   Z	tqdm.autor   rZ   r   r   r]   intpr   rH   rM   rl   ru   	euclideanrw   r|   r}   r   r   r   r   r   r   <module>   s   


~,         
 9]
 V
  "          