U
    ֽ|e[                 	   @   sR  d dl mZ d dlZd dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ zd dlZW n  ek
r   d dlmZ Y nX d dlZd dlZd dlmZmZ d dl Zd dl!Z!d dl"m#Z$ d dl%m&Z& d dl'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z6 d dl7m8Z9 e:ej;d e<ej=j>d Z?e<ej=j@d ZAdZBdZCejDZEddddddZFdd ZGdd ZHdd ZIdVd!d"ZJe!jKe!jLjMe!jLjMe!jLjMe!jLjMd#d$d%dWd(d)ZNdXd+d,ZOe!jKe!jLjMdddddf e!jLjMddd e!jLjMddd e!jLjMd-d$d$d.dYd/d0ZPi ddd d'd'd$d df	d1d2ZQe!K dZd4d5ZRe!S d6d7 ZTe!K d[d:d;ZUe!K d<d= ZVd\d>d?ZWd'd3di d'fd@dAZXd]dCdDZYdEdF ZZdGdH Z[e$j\dI i d$d d dfdJdKZ]e!K dLdM Z^dNdO Z_e!K dPdQ Z`dRdS ZaG dTdU dUe	ZbdS )^    )print_functionN)warn)	curve_fit)BaseEstimator)check_random_statecheck_array)check_is_fitted)pairwise_distances)	normalize)KDTree)joblib)triltriu)	submatrixts
csr_uniquefast_knn_indices)spectral_layout)optimize_layout_euclideanoptimize_layout_genericoptimize_layout_inverse)	NNDescent)named_distances)sparse_named_distancesC   gh㈵>MbP?   )correlationcosine	hellingerjaccarddicec                 c   s8   | D ].}t |ttfr,t|D ]
}|V  qq|V  qd S N)
isinstancelisttupleflatten_iter)	containerij r+   G/var/www/website-v5/atlas_env/lib/python3.8/site-packages/umap/umap_.pyr'   E   s
    
r'   c                 C   s   t t| S r#   )r&   r'   )r(   r+   r+   r,   	flattenedN   s    r-   c                 C   s   g }|g}i }d||< t j}|g}|r|d}|| |t jkr\t||kr\t| }|| d |k r"| | j}	|	D ]0}
|
|krz||
 ||
 || d ||
< qzq"t |S Nr   r   )	npinfpopappendlenmaxvaluesindicesarray)adjmatstartmin_verticesZexploredqueuelevelsZ	max_levelZvisitednode	neighborsZ	neighbourr+   r+   r,   breadth_first_searchR   s&    




r?   皙?Fc                 C   s   ||dk@ | dk@ r,t d| d|  d nZ|dk||| k@ r^td| d|  d| d n(||| krtd| d|  d	| d
 dS )zDA simple wrapper function to avoid large amounts of code repetition.r   zDisconnection_distance = z has removed z@ edges.  This is not a problem as no vertices were disconnected.zuA few of your vertices were disconnected from the manifold.  This shouldn't cause problems.
Disconnection_distance = z' edges.
It has only fully disconnected zC vertices.
Use umap.utils.disconnected_vertices() to identify them.z^A large number of your vertices were disconnected from the manifold.
Disconnection_distance = z" edges.
It has fully disconnected z vertices.
You might consider using find_disconnected_points() to find and remove these points from your data.
Use umap.utils.disconnected_vertices() to identify them.N)printr   )edges_removedvertices_disconnecteddisconnection_distanceZ
total_rowsZ	thresholdverboser+   r+   r,   raise_disconnected_warningl   s    	
rF   )psumlomidhiT)localsfastmath@         ?c              	   C   s:  t || }t j| jd t jd}t j| jd t jd}t | }t| jd D ]}	d}
t}d}| |	 }||dk }|jd |krtt 	|}|| }|dkr||d  ||	< |t
kr||	  ||| ||d    7  < n||d  ||	< n|jd dkrt |||	< t|D ]}d}td| jd D ]B}| |	|f ||	  }|dkrj|t ||  7 }n|d7 }q2t || t
k r q||kr|}|
| d }n$|}
|tkr|d9 }n|
| d }q|||	< ||	 dkrt |}||	 t| k r0t| ||	< qR||	 t| k rRt| ||	< qR||fS )a  Compute a continuous version of the distance to the kth nearest
    neighbor. That is, this is similar to knn-distance but allows continuous
    k values rather than requiring an integral k. In essence we are simply
    computing the distance such that the cardinality of fuzzy set we generate
    is k.

    Parameters
    ----------
    distances: array of shape (n_samples, n_neighbors)
        Distances to nearest neighbors for each samples. Each row should be a
        sorted list of distances to a given samples nearest neighbors.

    k: float
        The number of nearest neighbors to approximate for.

    n_iter: int (optional, default 64)
        We need to binary search for the correct distance value. This is the
        max number of iterations to use in such a search.

    local_connectivity: int (optional, default 1)
        The local connectivity required -- i.e. the number of nearest
        neighbors that should be assumed to be connected at a local level.
        The higher this value the more connected the manifold becomes
        locally. In practice this should be not more than the local intrinsic
        dimension of the manifold.

    bandwidth: float (optional, default 1)
        The target bandwidth of the kernel, larger values will produce
        larger return values.

    Returns
    -------
    knn_dist: array of shape (n_samples,)
        The distance to kth nearest neighbor, as suitably approximated.

    nn_dist: array of shape (n_samples,)
        The distance to the 1st nearest neighbor for each point.
    r   dtype        rN   r          @r   )r/   log2zerosshapefloat32meanrangeNPY_INFINITYintfloorSMOOTH_K_TOLERANCEr4   expfabsMIN_K_DIST_SCALE)	distanceskZn_iterlocal_connectivityZ	bandwidthtargetrhoresultZmean_distancesr)   rH   rJ   rI   Zith_distancesZnon_zero_distsindexZinterpolationnrG   r*   dZmean_ith_distancesr+   r+   r,   smooth_knn_dist   s\    0







ri   c
                 C   s   |	rt t d |dkrbt| |}
| t| jd dddf |
f  }|tjk}d|
|< d}nltddt	t
| jd d d	  }tdt	t
t| jd }t| ||||||d
|||	dd}|j\}
}|	rt t d |
||fS )aY  Compute the ``n_neighbors`` nearest points for each data point in ``X``
    under ``metric``. This may be exact, but more likely is approximated via
    nearest neighbor descent.

    Parameters
    ----------
    X: array of shape (n_samples, n_features)
        The input data to compute the k-neighbor graph of.

    n_neighbors: int
        The number of nearest neighbors to compute for each sample in ``X``.

    metric: string or callable
        The metric to use for the computation.

    metric_kwds: dict
        Any arguments to pass to the metric computation function.

    angular: bool
        Whether to use angular rp trees in NN approximation.

    random_state: np.random state
        The random state to use for approximate NN computations.

    low_memory: bool (optional, default True)
        Whether to pursue lower memory NNdescent.

    verbose: bool (optional, default False)
        Whether to print status data during the computation.

    Returns
    -------
    knn_indices: array of shape (n_samples, n_neighbors)
        The indices on the ``n_neighbors`` closest points in the dataset.

    knn_dists: array of shape (n_samples, n_neighbors)
        The distances to the ``n_neighbors`` closest points in the dataset.

    rp_forest: list of trees
        The random projection forest used for searching (if used, None otherwise)
    zFinding Nearest Neighborsprecomputedr   Nrj   rM            ?g      4@<   F)n_neighborsmetricmetric_kwdsrandom_staten_treesn_itersZmax_candidates
low_memoryn_jobsrE   
compressedz Finished Nearest Neighbor Search)rA   r   r   r/   arangerU   copyr0   minrZ   roundr4   rS   r   neighbor_graph)Xro   rp   rq   angularrr   ru   use_pynndescentrv   rE   knn_indices	knn_distsdisconnected_indexknn_search_indexrs   rt   r+   r+   r,   nearest_neighbors   s8    5
(
$
r   )r   sigmasrhosval)rK   parallelrL   c                 C   s`  | j d }| j d }tj| jtjd}tj| jtjd}	tj| jtjd}
|rbtj| jtjd}nd}t|D ]}t|D ]}| ||f dkrqz|dk| ||f |k@ rd}nN|||f ||  dks|| dkrd}n$t|||f ||  ||   }|||| | < | ||f |	|| | < ||
|| | < |rz|||f ||| | < qzqn||	|
|fS )	a3  Construct the membership strength data for the 1-skeleton of each local
    fuzzy simplicial set -- this is formed as a sparse matrix where each row is
    a local fuzzy simplicial set, with a membership strength for the
    1-simplex to each other data point.

    Parameters
    ----------
    knn_indices: array of shape (n_samples, n_neighbors)
        The indices on the ``n_neighbors`` closest points in the dataset.

    knn_dists: array of shape (n_samples, n_neighbors)
        The distances to the ``n_neighbors`` closest points in the dataset.

    sigmas: array of shape(n_samples)
        The normalization factor derived from the metric tensor approximation.

    rhos: array of shape(n_samples)
        The local connectivity adjustment.

    return_dists: bool (optional, default False)
        Whether to return the pairwise distance associated with each edge

    bipartite: bool (optional, default False)
        Does the nearest neighbour set represent a bipartite graph?  That is are the
        nearest neighbour indices from the same point set as the row indices?

    Returns
    -------
    rows: array of shape (n_samples * n_neighbors)
        Row data for the resulting sparse matrix (coo format)

    cols: array of shape (n_samples * n_neighbors)
        Column data for the resulting sparse matrix (coo format)

    vals: array of shape (n_samples * n_neighbors)
        Entries for the resulting sparse matrix (coo format)

    dists: array of shape (n_samples * n_neighbors)
        Distance associated with each entry in the resulting sparse matrix
    r   r   rO   Nrj   FrQ   rN   )rU   r/   rT   sizeint32rV   rX   r]   )r   r   r   r   return_dists	bipartite	n_samplesro   rowscolsvalsdistsr)   r*   r   r+   r+   r,   compute_membership_strengths]  s.    :

$$r   c              	   C   s<  |dks|dkr,t | ||||||d\}}}|tj}t|t|t|	d\}}t|||||\}}}}tjj	|||ff| j
d | j
d fd}|  |
r| }||}||| |  d| |  }|  |dkr|||fS |r(tjj	|||ff| j
d | j
d fd}||  }nd}||||fS dS )aD  Given a set of data X, a neighborhood size, and a measure of distance
    compute the fuzzy simplicial set (here represented as a fuzzy graph in
    the form of a sparse matrix) associated to the data. This is done by
    locally approximating geodesic distance at each point, creating a fuzzy
    simplicial set for each such point, and then combining all the local
    fuzzy simplicial sets into a global one via a fuzzy union.

    Parameters
    ----------
    X: array of shape (n_samples, n_features)
        The data to be modelled as a fuzzy simplicial set.

    n_neighbors: int
        The number of neighbors to use to approximate geodesic distance.
        Larger numbers induce more global estimates of the manifold that can
        miss finer detail, while smaller values will focus on fine manifold
        structure to the detriment of the larger picture.

    random_state: numpy RandomState or equivalent
        A state capable being used as a numpy random state.

    metric: string or function (optional, default 'euclidean')
        The metric to use to compute distances in high dimensional space.
        If a string is passed it must match a valid predefined metric. If
        a general metric is required a function that takes two 1d arrays and
        returns a float can be provided. For performance purposes it is
        required that this be a numba jit'd function. Valid string metrics
        include:
            * euclidean (or l2)
            * manhattan (or l1)
            * cityblock
            * braycurtis
            * canberra
            * chebyshev
            * correlation
            * cosine
            * dice
            * hamming
            * jaccard
            * kulsinski
            * ll_dirichlet
            * mahalanobis
            * matching
            * minkowski
            * rogerstanimoto
            * russellrao
            * seuclidean
            * sokalmichener
            * sokalsneath
            * sqeuclidean
            * yule
            * wminkowski

        Metrics that take arguments (such as minkowski, mahalanobis etc.)
        can have arguments passed via the metric_kwds dictionary. At this
        time care must be taken and dictionary elements must be ordered
        appropriately; this will hopefully be fixed in the future.

    metric_kwds: dict (optional, default {})
        Arguments to pass on to the metric, such as the ``p`` value for
        Minkowski distance.

    knn_indices: array of shape (n_samples, n_neighbors) (optional)
        If the k-nearest neighbors of each point has already been calculated
        you can pass them in here to save computation time. This should be
        an array with the indices of the k-nearest neighbors as a row for
        each data point.

    knn_dists: array of shape (n_samples, n_neighbors) (optional)
        If the k-nearest neighbors of each point has already been calculated
        you can pass them in here to save computation time. This should be
        an array with the distances of the k-nearest neighbors as a row for
        each data point.

    angular: bool (optional, default False)
        Whether to use angular/cosine distance for the random projection
        forest for seeding NN-descent to determine approximate nearest
        neighbors.

    set_op_mix_ratio: float (optional, default 1.0)
        Interpolate between (fuzzy) union and intersection as the set operation
        used to combine local fuzzy simplicial sets to obtain a global fuzzy
        simplicial sets. Both fuzzy set operations use the product t-norm.
        The value of this parameter should be between 0.0 and 1.0; a value of
        1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy
        intersection.

    local_connectivity: int (optional, default 1)
        The local connectivity required -- i.e. the number of nearest
        neighbors that should be assumed to be connected at a local level.
        The higher this value the more connected the manifold becomes
        locally. In practice this should be not more than the local intrinsic
        dimension of the manifold.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    return_dists: bool or None (optional, default None)
        Whether to return the pairwise distance associated with each edge.

    Returns
    -------
    fuzzy_simplicial_set: coo_matrix
        A fuzzy simplicial set represented as a sparse matrix. The (i,
        j) entry of the matrix represents the membership strength of the
        1-simplex between the ith and jth sample points.
    NrE   rb   r   rU   rN   )r   astyper/   rV   ri   floatr   scipysparse
coo_matrixrU   eliminate_zeros	transposemultiplyZmaximumZtodok)r}   ro   rr   rp   rq   r   r   r~   set_op_mix_ratiorb   Zapply_set_operationsrE   r   _r   r   r   r   r   r   re   r   prod_matrixdmatr+   r+   r,   fuzzy_simplicial_set  s\    z

    
 



 r         @c           	      C   s   t | jd D ]n}| | }|| }|| dks:|| dkrT||  t| 9  < q|| || kr||  t| 9  < qdS )a9  Under the assumption of categorical distance for the intersecting
    simplicial set perform a fast intersection.

    Parameters
    ----------
    rows: array
        An array of the row of each non-zero in the sparse matrix
        representation.

    cols: array
        An array of the column of each non-zero in the sparse matrix
        representation.

    values: array
        An array of the value of each non-zero in the sparse matrix
        representation.

    target: array of shape (n_samples)
        The categorical labels to use in the intersection.

    unknown_dist: float (optional, default 1.0)
        The distance an unknown label (-1) is assumed to be from any point.

    far_dist float (optional, default 5.0)
        The distance between unmatched labels.

    Returns
    -------
    None
    r   rj   NrX   rU   r/   r]   )	r   r   r5   rc   unknown_distfar_distnzr)   r*   r+   r+   r,   fast_intersectioni  s     r   c                 C   s\   t | jd D ]H}| | }|| }	||| ||	 f| }
||  t||
  9  < qdS )a)  Under the assumption of categorical distance for the intersecting
    simplicial set perform a fast intersection.

    Parameters
    ----------
    rows: array
        An array of the row of each non-zero in the sparse matrix
        representation.

    cols: array
        An array of the column of each non-zero in the sparse matrix
        representation.

    values: array of shape
        An array of the values of each non-zero in the sparse matrix
        representation.

    discrete_space: array of shape (n_samples, n_features)
        The vectors of categorical labels to use in the intersection.

    metric: numba function
        The function used to calculate distance over the target array.

    scale: float
        A scaling to apply to the metric.

    Returns
    -------
    None
    r   Nr   )r   r   r5   discrete_spacerp   Zmetric_argsscaler   r)   r*   distr+   r+   r,   fast_metric_intersection  s    "r          c           
      C   s   t |}d}t}d}t|D ]}d}t| jd D ]}	|t| |	 |7 }q4t || tk rb q||k r||}|| d }q|}|tkr|d9 }q|| d }qt | |S )NrQ   rN   r   rR   r   )	r/   rS   rY   rX   rU   powr^   r\   power)
Zprobabilitiesra   rt   rc   rH   rJ   rI   rg   rG   r*   r+   r+   r,   reprocess_row  s$    

r   c                 C   sL   t | jd d D ]4}t|| | | |d   || | | |d  < qd S r.   )rX   rU   r   )Zsimplicial_set_indptrZsimplicial_set_datar)   r+   r+   r,   reset_local_metrics  s     
r   c                 C   sX   t | dd} |r.|  } t| j| j |  } |  }| |}| | | } |   | S )aW  Reset the local connectivity requirement -- each data sample should
    have complete confidence in at least one 1-simplex in the simplicial set.
    We can enforce this by locally rescaling confidences, and then remerging the
    different local simplicial sets together.

    Parameters
    ----------
    simplicial_set: sparse matrix
        The simplicial set for which to recalculate with respect to local
        connectivity.

    Returns
    -------
    simplicial_set: sparse_matrix
        The recalculated simplicial set, now with the local connectivity
        assumption restored.
    r4   Znorm)	r
   tocsrr   indptrdatatocoor   r   r   )simplicial_setZreset_local_metricr   r   r+   r+   r,   reset_local_connectivity  s    
r   c              	   C   sz   |   } |dk	rR|tjkr&tj| }ntdt| j| j| j||t|	 | nt
| j| j| j||| |   t| S )a  Combine a fuzzy simplicial set with another fuzzy simplicial set
    generated from discrete metric data using discrete distances. The target
    data is assumed to be categorical label data (a vector of labels),
    and this will update the fuzzy simplicial set to respect that label data.

    TODO: optional category cardinality based weighting of distance

    Parameters
    ----------
    simplicial_set: sparse matrix
        The input fuzzy simplicial set.

    discrete_space: array of shape (n_samples)
        The categorical labels to use in the intersection.

    unknown_dist: float (optional, default 1.0)
        The distance an unknown label (-1) is assumed to be from any point.

    far_dist: float (optional, default 5.0)
        The distance between unmatched labels.

    metric: str (optional, default None)
        If not None, then use this metric to determine the
        distance between values.

    metric_scale: float (optional, default 1.0)
        If using a custom metric scale the distance values by
        this value -- this controls the weighting of the
        intersection. Larger values weight more toward target.

    Returns
    -------
    simplicial_set: sparse matrix
        The resulting intersected fuzzy simplicial set.
    Nz.Discrete intersection metric is not recognized)r   r   r   
ValueErrorr   rowcolr   r&   r5   r   r   r   )r   r   r   r   rp   
metric_kwsmetric_scaleZmetric_funcr+   r+   r,   +discrete_metric_simplicial_set_intersection  s0    ,


	r   rm   c                 C   s`   |r|   }n| |   }|  }| }tj|j|j|j|j|j|j|j|j|j||d |S )N)Z
mix_weightright_complement)	r   r   r   Zgeneral_sset_intersectionr   r6   r   r   r   )simplicial_set1simplicial_set2weightr   re   leftrightr+   r+   r,   #general_simplicial_set_intersectionU  s&    
r   c                 C   sL   | |   }|  }| }t|j|j|j|j|j|j|j|j|j	 |S r#   )	r   r   r   Zgeneral_sset_unionr   r6   r   r   r   )r   r   re   r   r   r+   r+   r,   general_simplicial_set_unionq  s    r   c                 C   sJ   dt j| jd t jd }|| |    }t|||dk  ||dk< |S )a  Given a set of weights and number of epochs generate the number of
    epochs per sample for each weight.

    Parameters
    ----------
    weights: array of shape (n_1_simplices)
        The weights ofhow much we wish to sample each 1-simplex.

    n_epochs: int
        The total number of epochs we want to train for.

    Returns
    -------
    An array of number of epochs per sample, one for each 1-simplex.
    g      r   rO   )r/   ZonesrU   Zfloat64r4   r   )weightsn_epochsre   r   r+   r+   r,   make_epochs_per_sample  s    r   	euclideanc           8      C   s.  |  }|  |jd }|jd dkr.d}nd}|r>|d7 }|dkrJ|}|dkrrd|j|j|j t| k < nd|j|j|j t| k < |  t|	tr|	d	kr|
j	d
d|jd |fd
tj}nt|	tr<|	dkr<t| |||
||d}dt|  }|| 
tj|
jd|jd |gd
tj }nt|	}t|jdkrtj|ddjd |jd k rt|}|j|dd\}}t|dddf }||
jd| |jd
tj }n|}t|j|} |j}!|j}"|j}#|
ttd
tj}$i }%|s|r6|r$tt d  |d }&tj|tjd}'tj|tjd}(t t|!D ]})|!|) }*|"|) }+|&|*|+f |&|*|+f  },|j|) }-|(|*  |-|, 7  < |(|+  |-|, 7  < |'|*  |-7  < |'|+  |-7  < qXd}.t!|.|(|'  }(|r(|(t|( t"|( }/|j|d< |'|d< |/|d< |r6|(|%d< d|t#|d  t|dt#|d  j
tjdd}|rt$|||!|"||| |||$||||||||d d!}n4t%|||!|"||| |||$||||t&|' ||d d"}|r&|rtt d#  t(||d$ d%i d&|
|d'\}0}1}2t)||d$ |
d%i |0|1|d d(	\}3}4}5}6|3  }3|3  |3  |3jd }tj|tjd}'tj|tjd}7|3j}!|3j}"t t|!D ]t})|!|) }*|"|) }+|6|*|+f },|3j|) }-|7|*  |-|, 7  < |7|+  |-|, 7  < |'|*  |-7  < |'|+  |-7  < qd}.t!|.|7|'  }7|7|%d)< ||%fS )*a]  Perform a fuzzy simplicial set embedding, using a specified
    initialisation method and then minimizing the fuzzy set cross entropy
    between the 1-skeletons of the high and low dimensional fuzzy simplicial
    sets.

    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The source data to be embedded by UMAP.

    graph: sparse matrix
        The 1-skeleton of the high dimensional fuzzy simplicial set as
        represented by a graph for which we require a sparse matrix for the
        (weighted) adjacency matrix.

    n_components: int
        The dimensionality of the euclidean space into which to embed the data.

    initial_alpha: float
        Initial learning rate for the SGD.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    gamma: float
        Weight to apply to negative samples.

    negative_sample_rate: int (optional, default 5)
        The number of negative samples to select per positive sample
        in the optimization process. Increasing this value will result
        in greater repulsive force being applied, greater optimization
        cost, but slightly more accuracy.

    n_epochs: int (optional, default 0)
        The number of training epochs to be used in optimizing the
        low dimensional embedding. Larger values result in more accurate
        embeddings. If 0 is specified a value will be selected based on
        the size of the input dataset (200 for large datasets, 500 for small).

    init: string
        How to initialize the low dimensional embedding. Options are:
            * 'spectral': use a spectral embedding of the fuzzy 1-skeleton
            * 'random': assign initial embedding positions at random.
            * A numpy array of initial embedding positions.

    random_state: numpy RandomState or equivalent
        A state capable being used as a numpy random state.

    metric: string or callable
        The metric used to measure distance in high dimensional space; used if
        multiple connected components need to be layed out.

    metric_kwds: dict
        Key word arguments to be passed to the metric function; used if
        multiple connected components need to be layed out.

    densmap: bool
        Whether to use the density-augmented objective function to optimize
        the embedding according to the densMAP algorithm.

    densmap_kwds: dict
        Key word arguments to be used by the densMAP optimization.

    output_dens: bool
        Whether to output local radii in the original data and the embedding.

    output_metric: function
        Function returning the distance between two points in embedding space and
        the gradient of the distance wrt the first argument.

    output_metric_kwds: dict
        Key word arguments to be passed to the output_metric function.

    euclidean_output: bool
        Whether to use the faster code specialised for euclidean output metrics

    parallel: bool (optional, default False)
        Whether to run the computation using numba parallel.
        Running in parallel is non-deterministic, and is not used
        if a random seed has been set, to ensure reproducibility.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict
        Key word arguments to be used by the tqdm progress bar.

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized of ``graph`` into an ``n_components`` dimensional
        euclidean space.

    aux_data: dict
        Auxiliary output returned with the embedding. When densMAP extension
        is turned on, this dictionary includes local radii in the original
        data (``rad_orig``) and in the embedding (``rad_emb``).
    r   r   '  i     N
   rQ   randomg      $g      $@lowhighr   spectral)rp   rq   g-C6?)r   r   r   axis)ra   r      z Computing original densitiesgraph_distsrO   g:0yE>mumu_sumRrad_origr   orderT)r   rE   densmapdensmap_kwds	tqdm_kwds
move_other)rE   r   r   z Computing embedding densitiesro   r   Fr   )rE   r   rad_emb)*r   Zsum_duplicatesrU   r   r4   r   r   r$   struniformr   r/   rV   r   absZnormalr7   r3   uniquer   queryrW   r   r   r   randint	INT32_MIN	INT32_MAXint64rA   r   rT   rX   logZstdrz   r   r   r&   r5   r   r   )8r   graphn_componentsZinitial_alphaabgammanegative_sample_rater   initrr   rp   rq   r   r   output_densoutput_metricoutput_metric_kwdsZeuclidean_outputr   rE   r   Z
n_verticesZdefault_epochs	embeddingZinitialisationZ	expansionZ	init_datatreer   ZindZnndistepochs_per_sampleheadtailr   	rng_stateaux_datar   r   ror)   r*   ra   Dr   epsilonr   r   r   Z	rp_forestZ	emb_graphZ
emb_sigmasZemb_rhosZ	emb_distsrer+   r+   r,   simplicial_set_embedding  sh   }
    
  

 



r  c                 C   s   t j| jd |jd ft jd}t| jd D ]\}t| jd D ]H}t|jd D ]4}|||f  |||f || ||f |f  7  < qRq@q.|S )a  Given indices and weights and an original embeddings
    initialize the positions of new points relative to the
    indices and weights (of their neighbors in the source data).

    Parameters
    ----------
    indices: array of shape (n_new_samples, n_neighbors)
        The indices of the neighbors of each new sample

    weights: array of shape (n_new_samples, n_neighbors)
        The membership strengths of associated 1-simplices
        for each of the new samples.

    embedding: array of shape (n_samples, dim)
        The original embedding of the source data.

    Returns
    -------
    new_embedding: array of shape (n_new_samples, dim)
        An initial embedding of the new sample points.
    r   r   rO   )r/   rT   rU   rV   rX   )r6   r   r   re   r)   r*   rh   r+   r+   r,   init_transform  s     6r  c              
   C   s   t j| jd |jd ft jd}t| jd D ]}t| | j}|dkrTt j||< q.t | | }| | jD ]t}| ||f dkr||ddf ||ddf<  q.t|jd D ]0}|||f  | ||f | |||f  7  < qqlq.|S )aX  Given a bipartite graph representing the 1-simplices and strengths between the
     new points and the original data set along with an embedding of the original points
    initialize the positions of new points relative to the strengths (of their neighbors in the source data).

    If a point is in our original data set it embeds at the original points coordinates.
    If a point has no neighbours in our original dataset it embeds as the np.nan vector.
    Otherwise a point is the weighted average of it's neighbours embedding locations.

    Parameters
    ----------
    graph: csr_matrix (n_new_samples, n_samples)
        A matrix indicating the the 1-simplices and their associated strengths.  These strengths should
        be values between zero and one and not normalized.  One indicating that the new point was identical
        to one of our original points.

    embedding: array of shape (n_samples, dim)
        The original embedding of the source data.

    Returns
    -------
    new_embedding: array of shape (n_new_samples, dim)
        An initial embedding of the new sample points.
    r   r   rO   N)	r/   rT   rU   rV   rX   r3   r6   nansum)r   r   re   Z	row_indexZnum_neighboursZrow_sumZ	col_indexrh   r+   r+   r,   init_graph_transform	  s      
r  c              
   C   s   t ||jd D ]}d}t |jd D ]T}t | jd D ]@}|||f |k r8|d7 }| ||f  | |||f |f 7  < q8q&t | jd D ]}| ||f  |  < qqd S r.   )rX   rU   )Zcurrent_initZn_original_samplesr6   r)   rg   r*   rh   r+   r+   r,   init_update5  s    (r  c                 C   sv   dd }t d| d d}t |j}d|||k < t |||k |  |  |||k< t|||\}}|d |d fS )zFit a, b params for the differentiable curve used in lower
    dimensional fuzzy simplicial complex construction. We want the
    smooth curve (from a pre-defined family with simple gradient) that
    best matches an offset exponential decay.
    c                 S   s   dd|| d|     S )NrN   r   r+   )xr   r   r+   r+   r,   curveK  s    zfind_ab_params.<locals>.curver   r   i,  rN   r   )r/   ZlinspacerT   rU   r]   r   )spreadmin_distr	  ZxvZyvparamsZcovarr+   r+   r,   find_ab_paramsD  s    $r  c                '   @   s   e Zd ZdZd1ddZdd Zd2ddZdd Zdd Zdd  Z	d!d" Z
d3d#d$Zd%d& Zd4d'd(Zd)d* Zd+d, Zd-d. Zd/d0 ZdS )5UMAPaF/  Uniform Manifold Approximation and Projection

    Finds a low dimensional embedding of the data that approximates
    an underlying manifold.

    Parameters
    ----------
    n_neighbors: float (optional, default 15)
        The size of local neighborhood (in terms of number of neighboring
        sample points) used for manifold approximation. Larger values
        result in more global views of the manifold, while smaller
        values result in more local data being preserved. In general
        values should be in the range 2 to 100.

    n_components: int (optional, default 2)
        The dimension of the space to embed into. This defaults to 2 to
        provide easy visualization, but can reasonably be set to any
        integer value in the range 2 to 100.

    metric: string or function (optional, default 'euclidean')
        The metric to use to compute distances in high dimensional space.
        If a string is passed it must match a valid predefined metric. If
        a general metric is required a function that takes two 1d arrays and
        returns a float can be provided. For performance purposes it is
        required that this be a numba jit'd function. Valid string metrics
        include:
            * euclidean
            * manhattan
            * chebyshev
            * minkowski
            * canberra
            * braycurtis
            * mahalanobis
            * wminkowski
            * seuclidean
            * cosine
            * correlation
            * haversine
            * hamming
            * jaccard
            * dice
            * russelrao
            * kulsinski
            * ll_dirichlet
            * hellinger
            * rogerstanimoto
            * sokalmichener
            * sokalsneath
            * yule
        Metrics that take arguments (such as minkowski, mahalanobis etc.)
        can have arguments passed via the metric_kwds dictionary. At this
        time care must be taken and dictionary elements must be ordered
        appropriately; this will hopefully be fixed in the future.

    n_epochs: int (optional, default None)
        The number of training epochs to be used in optimizing the
        low dimensional embedding. Larger values result in more accurate
        embeddings. If None is specified a value will be selected based on
        the size of the input dataset (200 for large datasets, 500 for small).

    learning_rate: float (optional, default 1.0)
        The initial learning rate for the embedding optimization.

    init: string (optional, default 'spectral')
        How to initialize the low dimensional embedding. Options are:
            * 'spectral': use a spectral embedding of the fuzzy 1-skeleton
            * 'random': assign initial embedding positions at random.
            * A numpy array of initial embedding positions.

    min_dist: float (optional, default 0.1)
        The effective minimum distance between embedded points. Smaller values
        will result in a more clustered/clumped embedding where nearby points
        on the manifold are drawn closer together, while larger values will
        result on a more even dispersal of points. The value should be set
        relative to the ``spread`` value, which determines the scale at which
        embedded points will be spread out.

    spread: float (optional, default 1.0)
        The effective scale of embedded points. In combination with ``min_dist``
        this determines how clustered/clumped the embedded points are.

    low_memory: bool (optional, default True)
        For some datasets the nearest neighbor computation can consume a lot of
        memory. If you find that UMAP is failing due to memory constraints
        consider setting this option to True. This approach is more
        computationally expensive, but avoids excessive memory use.

    set_op_mix_ratio: float (optional, default 1.0)
        Interpolate between (fuzzy) union and intersection as the set operation
        used to combine local fuzzy simplicial sets to obtain a global fuzzy
        simplicial sets. Both fuzzy set operations use the product t-norm.
        The value of this parameter should be between 0.0 and 1.0; a value of
        1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy
        intersection.

    local_connectivity: int (optional, default 1)
        The local connectivity required -- i.e. the number of nearest
        neighbors that should be assumed to be connected at a local level.
        The higher this value the more connected the manifold becomes
        locally. In practice this should be not more than the local intrinsic
        dimension of the manifold.

    repulsion_strength: float (optional, default 1.0)
        Weighting applied to negative samples in low dimensional embedding
        optimization. Values higher than one will result in greater weight
        being given to negative samples.

    negative_sample_rate: int (optional, default 5)
        The number of negative samples to select per positive sample
        in the optimization process. Increasing this value will result
        in greater repulsive force being applied, greater optimization
        cost, but slightly more accuracy.

    transform_queue_size: float (optional, default 4.0)
        For transform operations (embedding new points using a trained model_
        this will control how aggressively to search for nearest neighbors.
        Larger values will result in slower performance but more accurate
        nearest neighbor evaluation.

    a: float (optional, default None)
        More specific parameters controlling the embedding. If None these
        values are set automatically as determined by ``min_dist`` and
        ``spread``.
    b: float (optional, default None)
        More specific parameters controlling the embedding. If None these
        values are set automatically as determined by ``min_dist`` and
        ``spread``.

    random_state: int, RandomState instance or None, optional (default: None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    metric_kwds: dict (optional, default None)
        Arguments to pass on to the metric, such as the ``p`` value for
        Minkowski distance. If None then no arguments are passed on.

    angular_rp_forest: bool (optional, default False)
        Whether to use an angular random projection forest to initialise
        the approximate nearest neighbor search. This can be faster, but is
        mostly on useful for metric that use an angular style distance such
        as cosine, correlation etc. In the case of those metrics angular forests
        will be chosen automatically.

    target_n_neighbors: int (optional, default -1)
        The number of nearest neighbors to use to construct the target simplcial
        set. If set to -1 use the ``n_neighbors`` value.

    target_metric: string or callable (optional, default 'categorical')
        The metric used to measure distance for a target array is using supervised
        dimension reduction. By default this is 'categorical' which will measure
        distance in terms of whether categories match or are different. Furthermore,
        if semi-supervised is required target values of -1 will be trated as
        unlabelled under the 'categorical' metric. If the target array takes
        continuous values (e.g. for a regression problem) then metric of 'l1'
        or 'l2' is probably more appropriate.

    target_metric_kwds: dict (optional, default None)
        Keyword argument to pass to the target metric when performing
        supervised dimension reduction. If None then no arguments are passed on.

    target_weight: float (optional, default 0.5)
        weighting factor between data topology and target topology. A value of
        0.0 weights predominantly on data, a value of 1.0 places a strong emphasis on
        target. The default of 0.5 balances the weighting equally between data and
        target.

    transform_seed: int (optional, default 42)
        Random seed used for the stochastic aspects of the transform operation.
        This ensures consistency in transform operations.

    verbose: bool (optional, default False)
        Controls verbosity of logging.

    tqdm_kwds: dict (optional, defaul None)
        Key word arguments to be used by the tqdm progress bar.

    unique: bool (optional, default False)
        Controls if the rows of your data should be uniqued before being
        embedded.  If you have more duplicates than you have n_neighbour
        you can have the identical data points lying in different regions of
        your space.  It also violates the definition of a metric.
        For to map from internal structures back to your data use the variable
        _unique_inverse_.

    densmap: bool (optional, default False)
        Specifies whether the density-augmented objective of densMAP
        should be used for optimization. Turning on this option generates
        an embedding where the local densities are encouraged to be correlated
        with those in the original space. Parameters below with the prefix 'dens'
        further control the behavior of this extension.

    dens_lambda: float (optional, default 2.0)
        Controls the regularization weight of the density correlation term
        in densMAP. Higher values prioritize density preservation over the
        UMAP objective, and vice versa for values closer to zero. Setting this
        parameter to zero is equivalent to running the original UMAP algorithm.

    dens_frac: float (optional, default 0.3)
        Controls the fraction of epochs (between 0 and 1) where the
        density-augmented objective is used in densMAP. The first
        (1 - dens_frac) fraction of epochs optimize the original UMAP objective
        before introducing the density correlation term.

    dens_var_shift: float (optional, default 0.1)
        A small constant added to the variance of local radii in the
        embedding when calculating the density correlation objective to
        prevent numerical instability from dividing by a small number

    output_dens: float (optional, default False)
        Determines whether the local radii of the final embedding (an inverse
        measure of local density) are computed and returned in addition to
        the embedding. If set to True, local radii of the original data
        are also included in the output for comparison; the output is a tuple
        (embedding, original local radii, embedding local radii). This option
        can also be used when densmap=False to calculate the densities for
        UMAP embeddings.

    disconnection_distance: float (optional, default np.inf or maximal value for bounded distances)
        Disconnect any vertices of distance greater than or equal to disconnection_distance when approximating the
        manifold via our k-nn graph. This is particularly useful in the case that you have a bounded metric.  The
        UMAP assumption that we have a connected manifold can be problematic when you have points that are maximally
        different from all the rest of your data.  The connected manifold assumption will make such points have perfect
        similarity to a random set of other points.  Too many such points will artificially connect your space.

    precomputed_knn: tuple (optional, default (None,None,None))
        If the k-nearest neighbors of each point has already been calculated you
        can pass them in here to save computation time. The number of nearest
        neighbors in the precomputed_knn must be greater or equal to the
        n_neighbors parameter. This should be a tuple containing the output
        of the nearest_neighbors() function or attributes from a previously fit
        UMAP object; (knn_indices, knn_dists,knn_search_index).
    r   r   r   NrN   r   r@   Trj   rl         @Fcategoricalrm   *   r   rR   333333?NNNc(           (      C   s   || _ || _|| _|| _|| _|| _|| _|	| _|| _|| _	|| _
|| _|
| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _| | _|!| _|"| _|#| _|$| _ |%| _!|&| _"|'| _#|| _$|| _%|| _&d S r#   )'ro   rp   r   target_metricrq   r   r   r   r   repulsion_strengthlearning_rater
  r  ru   r   rb   r   rr   angular_rp_foresttransform_queue_sizetarget_n_neighborstarget_metric_kwdstarget_weighttransform_seedtransform_modeforce_approximation_algorithmrE   r   r   r   dens_lambda	dens_fracdens_var_shiftr   rD   precomputed_knnrv   r   r   )(selfro   r   rp   rq   r   r   r   r  r   r  r
  ru   rv   r   rb   r  r   r  r   r   rr   r  r  r  r  r  r  r  r  rE   r   r   r   r  r   r!  r   rD   r"  r+   r+   r,   __init__B  sP    *zUMAP.__init__c                    s  | j dk s| j dkrtd| jdk r.td| j| jkrBtd| jdk rTtdt| jtsvt| jtj	svtdt| jtr| jdkrtd	t| jtj	r| jj
d
 | jkrtdt| jtst| jstd| jdk rtd| jdk rtd| jdk rtd| jdk r2| jdkr2tdt| jtst| jtrVtd| jd
 dkrntdzt| j| _W n tk
r   tdY nX | jd
k rtd| jd k	r| jdk st| jtstd| jd kri | _n| j| _| jd kri | _n| j| _| jd kr,i | _n| j| _tj| jrLd| _nd| _t| jr| | j| j| j}|r| j t j!dd fdd}|| _"| j| _#n| j| _"d | _#t$d n`| jdkr| j%rtdt$d  | j| _"d | _#n*| jd!kr| j& dk rtd"n| jt'j(kr| jrd| jtj)krRtj)| j | _"ntd#*| jnt'j(| j | _"zt'j+| j | _#W n, t,k
r   t$d$*| j d | _#Y nX nl| jt-kr| jr| jt.krt.| j | _"ntd#*| jnt-| j | _"t$d$*| j d | _#ntd%t| j/rV| | j/| j}|rL| j/| _0ntd&n\| j/dkrltd'nF| j/t'j+krt'j+| j/ | _0n(| j/t'j(krtd(*| j/ntd)| jd*krd| _1| j2dk s| j2dkrtd+| j3dk rtd,| j4dk s| j4dkrtd-| j5dk r,td.| j6r:| j3nd| j6rJ| j4nd| j5| jd/| _7| j6rx| j/d0krxtd1| j8d krt9:| jtj;| _<n.t| j8tst| j8t=r| j8| _<ntd2| j>d kri | _>nt| j>t?dkrtd3d4| j>kr
d5| j>d4< d6| j>kr$d7}|| j>d6< t@| d8r~| jAd k	r~| j%rLtd9t| jBtj	sdtd:t| jAtj	s|td;| jAj
| jBj
krtd<t| jCtDstd=| jAj
d
 | jk rt$d> d | _Bd | _Ad | _Cn| jAj
d | jj
d krt$d? d | _Bd | _Ad | _Cnl| jAj
d d@k r6| jEs6t$dA nH| jAj
d
 | jkr~| jBd d d | jf | _B| jAd d d | jf | _Ad S )BNrQ   rN   z,set_op_mix_ratio must be between 0.0 and 1.0z%repulsion_strength cannot be negativez-min_dist must be less than or equal to spreadzmin_dist cannot be negativez init must be a string or ndarray)r   r   z1string init values must be "spectral" or "random"r   z*init ndarray must match n_components valuez!metric must be string or callabler   z%negative sample rate must be positivezlearning_rate must be positiver   z"n_neighbors must be greater than 1rj   z)target_n_neighbors must be greater than 1zn_components must be an intz#n_components must be a whole numberz#n_components must be greater than 0z&n_epochs must be a nonnegative integerTFrL   c                    s    | |f| d S Nr   r+   r  ykwds_mr+   r,   
_dist_only  s    z-UMAP._validate_parameters.<locals>._dist_onlyzcustom distance metric does not return gradient; inverse_transform will be unavailable. To enable using inverse_transform method, define a distance function that returns a tuple of (distance [float], gradient [np.array])rk   z0unique is poorly defined on a precomputed metricz?using precomputed metric; inverse_transform will be unavailabler    z3Metric 'hellinger' does not support negative valuesz*Metric {} is not supported for sparse datazfgradient function is not yet implemented for {} distance metric; inverse_transform will be unavailablez2metric is neither callable nor a recognised stringzScustom output_metric must return a tuple of (distance [float], gradient [np.array])z&output_metric cannnot be 'precomputed'z0gradient function is not yet implemented for {}.z9output_metric is neither callable nor a recognised string)r   r   r"   r!   Zll_dirichletr    z7n_jobs must be a postive integer, or -1 (for all cores)zdens_lambda cannot be negativez%dens_frac must be between 0.0 and 1.0z!dens_var_shift cannot be negativelambdafracZ	var_shiftro   r   l2z6Non-Euclidean output metric not supported for densMAP.z8disconnection_distance must either be None or a numeric.ztqdm_kwds must be a dictionary. Please provide valid tqdm parameters as key value pairs. Valid tqdm parameters can be found here: https://github.com/tqdm/tqdm#parametersdesczEpochs completedZ
bar_formatzA{desc}: {percentage:3.0f}%| {bar} {n_fmt}/{total_fmt} [{elapsed}]r   z6unique is not currently available for precomputed_knn.z*precomputed_knn[0] must be ndarray object.z*precomputed_knn[1] must be ndarray object.zPprecomputed_knn[0] and precomputed_knn[1] must be numpy arrays of the same size.zBprecomputed_knn[2] (knn_search_index) must be an NNDescent object.zprecomputed_knn has a lower number of neighbors than n_neighbors parameter. precomputed_knn will be ignored and the k-nn will be computed normally.zprecomputed_knn has a different number of samples than the data you are fitting. precomputed_knn will be ignored andthe k-nn will be computed normally.   zprecomputed_knn is meant for large datasets. Since your data is small, precomputed_knn will be ignored and the k-nn will be computed normally.)Fr   r   r  r  r
  r$   r   r   r/   ndarrayrU   r   rp   callabler   _initial_alpharo   r  rZ   r   rq   _metric_kwdsr   _output_metric_kwdsr  _target_metric_kwdsr   r   Zisspmatrix_csr	_raw_data_sparse_data_check_custom_metricnumbanjit_input_distance_func_inverse_distance_funcr   r   rz   r   r   r   formatnamed_distances_with_gradientsKeyErrorpynn_named_distancespynn_sparse_named_distancesr   _output_distance_funcr  rv   r  r   r!  r   _densmap_kwdsrD   DISCONNECTION_DISTANCESgetr0   _disconnection_distancer   r   dicthasattrr   r   r   r   r  )r#  Zin_returns_gradr,  Zout_returns_gradZbar_fr+   r*  r,   _validate_parameters  s   




  





 

  


zUMAP._validate_parametersc                 C   s   |d k	r(|t jd|jd d \}}nt jjddd| jfd\}}tj|rl||j	|j
|j	|j
f|}n|||f|}t|dot|dkS )Nr   r   ir   r   __iter__)r/   r   r   rU   r   r   r   r   issparser6   r   rL  r3   )r#  rp   r)  r   r  r(  Z
metric_outr+   r+   r,   r<    s     zUMAP._check_custom_metricc                 G   s  t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _tdd | jD r~d | _t dd |D | _t d	d |D | _t d
d |D | _	t dd |D | _
t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t dd |D | _t d d |D | _t d!d |D | _ t d"d |D | _!t d#d |D | _"t d$d |D | _#t d%d |D | _$d S )&Nc                 S   s   g | ]
}|j qS r+   )ro   .0mr+   r+   r,   
<listcomp>  s     z2UMAP._populate_combined_params.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r+   )rp   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )rq   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s    g | ]}|j d k	r|j ndqS )Nrj   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]}|d kqS )rj   r+   )rQ  r  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r
  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )ru   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )rr   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r!  rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )r   rP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )_arP  r+   r+   r,   rS    s     c                 S   s   g | ]
}|j qS r+   )_brP  r+   r+   r,   rS    s     )%r-   ro   rp   rq   r   r   allr   r   r  r  r
  r  ru   r   rb   r   rr   r  r  r  r  r  r  r  r  rE   r   r   r  r   r!  r   r   r   rT  rU  )r#  modelsr+   r+   r,   _populate_combined_params  sR    zUMAP._populate_combined_paramsc                 C   s  t | dgdd t |dgdd | jjd |jjd kr@tdt }|| | t| j|jd|_t|jd|_tj	j
|jd dkrtd	 d
}nd}t|j|_t|j|_t|jt|jt|jt|jd|_|jd krd }nt|j}td |jt|jt|jt|jt|jt|jt|j ||t!ddi |j|j|jdt"t|j#| j$d\|_%}|jr|d |_&|d |_'|S )Ngraph_'Only fitted UMAP models can be combinedZ
attributesmsgr   7Only models with the equivalent samples can be combinedrm   Tr   zqCombined graph is not connected but multi-component layout is unsupported. Falling back to random initialization.r   r   r-  r  r   Fr   rE   r   r   r   (r   rY  rU   r   r  rX  r   r   r   r   csgraphconnected_componentsr   r/   anyr   r   r4   r  r   r!  ro   rG  r   r  rz   r   r  rW   rT  rU  r  r   r   boolrE   r   
embedding_	rad_orig_rad_emb_r#  otherre   r   r   r   r+   r+   r,   __mul__  sz          












zUMAP.__mul__c                 C   s  t | dgdd t |dgdd | jjd |jjd kr@tdt }|| | t| j|j|_t|jd|_tj	j
|jd dkrtd d	}nd
}t|j|_t|j|_t|jt|jt|jt|jd|_|jd krd }nt|j}td |jt|jt|jt|jt|jt|jt|j ||t!ddi |j|j|jdt"t|j#| j$d\|_%}|jr|d |_&|d |_'|S )NrY  rZ  r[  r   r]  Tr   pCombined graph is not connected but mult-component layout is unsupported. Falling back to random initialization.r   r   r-  r  r   Fr^  r   r   )(r   rY  rU   r   r  rX  r   r   r   r   r`  ra  r   r/   rb  r   r   r4   r  r   r!  ro   rG  r   r  rz   r   r  rW   rT  rU  r  r   r   rc  rE   r   rd  re  rf  rg  r+   r+   r,   __add__;  sr        












zUMAP.__add__c                 C   s  t | dgdd t |dgdd | jjd |jjd kr@tdt }|| | t| j|jddd|_t|jd	|_tj	j
|jd d
krtd d}nd}t|j|_t|j|_t|jt|jt|jt|jd|_|jd krd }nt|j}td |jt|jt|jt|jt|jt|jt|j ||t!ddi |j|j|jd	t"t|j#| j$d\|_%}|jr|d |_&|d |_'|S )NrY  rZ  r[  r   r]  rm   T)r   r   Fr   rj  r   r   r-  r  r   r^  r   r   r_  rg  r+   r+   r,   __sub__  s|           












zUMAP.__sub__c                  C   s(
  t |tjddd}|| _| jdks,| jdkrDt| j| j\| _	| _
n| j| _	| j| _
t| jtjrvt | jtjdd}n| j}| j| _| jd | _| jd | _| jd	 | _|   | jrtt|  t | _| jdkr| jdk	rt| j | jr|| jrt|\}}}n"tj|d
d
d
dddd \}}}| jrttd|j d d|| j d  t!|}td|| d||  || _"n$t#t$|j d }t#t$|j d }|| j d | j%kr|| j d dkrt&d| j'f| _(| S t)d || j d d | _*| j+r| j*| j,d< n| j%| _*| jr2|j-s2|.  t/| j0}| jrPtt1 d | j2dkr"| jr"t3|4 t5|4 krt6dt7|8 dkst6d| jdkrVtj&|j d | j%ftj9d| _:tj&| j:j tj;d| _<t$|j d D ]b}	||	 j=}
||	 j>}t?|
| j*k r t6dt@|
d| j* }|| | j:|	< |
| | j<|	< qn| j| _:| j| _<| j<| jAk}d| j:|< tjB| j<|< |C }tD|| | j%|d| jE| j:| j<| jF| jG| jHd
| j| j+p| jI\| _J| _K| _L| _MtCtN| jJjCddO dk}tP||| jA| jj d | jd n|| j d dk r| jQsd
| _Rz2| jrT| j2n| jS}tT|| fd|i| jE}W n t6tUfk
r } zr| jrtV| j2stWjX| j2 }tWjY|| Z || jEd}ntWjY|| | jS| jEd}ntWjY|| | jS| jEd}W 5 d}~X Y nX tC|| jAk}tjB||| jAk< tD|| j*|d| jEdd| jF| jG| jHd
| j| j+p`| jI\| _J| _K| _L| _MtCtN| jJjCddO dk}tP||| jA| jj d | jd nZd| _R| jr| j2t[kr| j2}n"| js| j2t\kr| j2}n| jS}| jdkrBt]|| | j*|| jE| jF|| j^d
| j| jd
\| _:| _<| __n| j| _:| j| _<| j| __| j<| jAk}d| j:|< tjB| j<|< |C }tD|| | j%||| jE| j:| j<| jF| jG| jHd
| j| j+p| jI\| _J| _K| _L| _MtCtN| jJjCddO dk}tP||| jA| jj d | jd |dk		r$| js,t?|n|j d }|t?|krZt6d j`|t?|d!| jad"krp|| }nt |dd#| }| jad$kr| jbd%k rd&d%d%| jb   }nd'}tc| jJ||d(| _JnV| jatWjdkr | jbd%k rd&d%d%| jb   }nd'}tWe|| ja}tc| jJ|| ja||d)| _Jnt?|j dkr<|fdd}| jgdkrP| j*}n| jg}|j d dk rztT|fd| jai| jh}W n. tUt6fk
r   tWjY|| ja| jhd}Y nX tD|||d| jhdddd%d%d\}}}n&tD|||| ja| jhdddd%d%d\}}}ti| jJ|| jb| _Jtj| jJ| _Jd
| _knd| _k| j+	s:| jI	rF| jM| j,d*< | j	rZtt1 d+ | jld,k	r| m| j| | jn||\| _(}tN| jJjCddO dk}t?|dk	rto| j'tjp| j(|< | j(| | _(| jI	r|d- | | _q|d. | | _r| j
r
tt1 d/  t| j tst| j| _u| S )0a  Fit X into an embedded space.

        Optionally use y for supervised dimension reduction.

        Parameters
        ----------
        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
            If the metric is 'precomputed' X must be a square distance
            matrix. Otherwise it contains a sample per row. If the method
            is 'exact', X may be a sparse matrix of type 'csr', 'csc'
            or 'coo'.

        y : array, shape (n_samples)
            A target array for supervised dimension reduction. How this is
            handled is determined by parameters UMAP was instantiated with.
            The relevant attributes are ``target_metric`` and
            ``target_metric_kwds``.
        csrr   rP   accept_sparser   NF)rP   ro  r   r   r   T)Zreturn_indexZreturn_inverseZreturn_countsr      z2Unique=True -> Number of data points reduced from z to zMost common duplicate isz with a count of zIn_neighbors is larger than the dataset size; truncating to X.shape[0] - 1ro   zConstruct fuzzy simplicial setrk   z;Sparse precomputed distance matrices should be symmetrical!z.Non-zero distances from samples to themselves!rO   z3Some rows contain fewer than n_neighbors distances!rj   r   r   r3  rp   rp   r)  r   rv   rE   zELength of x = {len_x}, length of y = {len_y}, while it must be equal.)Zlen_xZlen_ystring)Z	ensure_2dr  rN   g      @g   mB)r   )rp   r   r   r   zConstruct embeddingr   r   r   z Finished embedding)vr   r/   rV   r:  r   r   r  r
  r  rT  rU  r$   r   r4  r  r6  r"  r   r   r   rM  rE   rA   r   r=  Zget_num_threadsZ_original_n_threadsrv   Zset_num_threadsr   r;  r   rU   ZargmaxZ_unique_inverse_r%   rX   ro   rT   r   rd  r   _n_neighborsr   rG  Zhas_sorted_indicesZsort_indicesr   rr   r   rp   sparse_trilZgetnnzsparse_triur   rV  ZdiagonalrZ   _knn_indicesr   
_knn_distsr   r6   r3   argsortrJ  r0   r  r   r7  r  r   rb   r   rY  _sigmas_rhosZgraph_dists_r7   flattenrF   r  _small_datar?  r	   	TypeErrorr5  r   r   pairwise_special_metrictoarrayrE  rD  r   ru   _knn_search_indexrA  r  r  r   ZDISCRETE_METRICSZget_discrete_paramsreshaper  r9  r   r   _supervisedr  _fit_embed_datar   fullr  re  rf  r   hash_input_hash) r#  r}   r(  r   rf   Zinversecountsmost_commonrr   Zrow_idrow_dataZrow_indicesZrow_nn_data_indicesr   rB   rC   r+  r   e	nn_metricZlen_XZy_r   r   r   r  ZydmatZtarget_graphZtarget_sigmasZtarget_rhosr   Zdisconnected_verticesr+   r+   r,   fit  s   
 

 







	



 
  	


  

 
zUMAP.fitc                 C   s`   t || j| j| j| j| j| j| j|||| j| j	| j
| j| j| j| j| jdk| jdk| j| jdS )zbA method wrapper for simplicial_set_embedding that can be
        replaced by subclasses.
        r0  Nr   )r  rY  r   r6  rT  rU  r  r   r?  r7  r   rG  r   rF  r8  r   rr   rE   r   )r#  r}   r   r   rr   r+   r+   r,   r  
  s0    zUMAP._fit_embed_datac                 C   sX   |  || | jdkr4| jr,| j| j| jfS | jS n | jdkrD| jS td| jdS )ae  Fit X into an embedded space and return that transformed
        output.

        Parameters
        ----------
        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
            If the metric is 'precomputed' X must be a square distance
            matrix. Otherwise it contains a sample per row.

        y : array, shape (n_samples)
            A target array for supervised dimension reduction. How this is
            handled is determined by parameters UMAP was instantiated with.
            The relevant attributes are ``target_metric`` and
            ``target_metric_kwds``.

        Returns
        -------
        X_new : array, shape (n_samples, n_components)
            Embedding of the training data in low-dimensional space.

        or a tuple (X_new, r_orig, r_emb) if ``output_dens`` flag is set,
        which additionally includes:

        r_orig: array, shape (n_samples)
            Local radii of data points in the original data space (log-transformed).

        r_emb: array, shape (n_samples)
            Local radii of data points in the embedding (log-transformed).
        r   r   GUnrecognized transform mode {}; should be one of 'embedding' or 'graph'N)	r  r  r   rd  re  rf  rY  r   rA  )r#  r}   r(  r+   r+   r,   fit_transform
  s    

zUMAP.fit_transformc                 C   s(  | j jd dkrtdt|tjddd}t|}|| jkrn| j	dkrN| j
S | j	dkr^| jS td	| j	| jr|td
t| j}|ttdtj}| jdkrtd |jd | j jd ksttj|rtj|jd | jftjdd}tj|tjdd}t |jd D ]p}t!|| j"}t#|| jk rLtd| j$ d|| j%|d| j  ||< || j"|d| j  ||< qn8tj!|ddddd| jf tj}tj&||dd}t'|dkrt'|dks2tnH| j(rz2| j)r| jn| j*}	t+|| j fd|	i| j,}
W n t-tfk
r   | j)rt.| jszt/j0| j }	t/j1|2 | j 2 |	| j,d}
nt/j1|| j | j*| j,d}
nt/j1|| j | j*| j,d}
Y nX t3|
| jddd| jf }t4|
|| j}t!|}t4||| j}t4||| j}n*| j5j6rdnd}| j5j7|| j$|d\}}|jtjdd}d||| j8k< t9d| j:d }t;|t<| jt<|d\}}t=||||dd\}}}}tjj>|||ff|jd | j jd fd}| j	dkr|S |? }|@  tA|| j
}| jBdkr|jd d krd!}nd"}ntC| jBd# }d|j"|j"|j"9 t<| k < |@  tD|j"|}|jE}|jF}|j"}| jGd$krtH|| j
jtjdd%||||jd || jI| jJ|| jK| jLd& | jM| jNdk| jO| jPd'}n\tQ|| j
jtjdd%||||jd || jI| jJ|| jK| jLd& | jM| jRtS| jTU | jO| jPd'}|S )(ay  Transform X into the existing embedded space and return that
        transformed output.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            New data to be transformed.

        Returns
        -------
        X_new : array, shape (n_samples, n_components)
            Embedding of the new data in low-dimensional space.
        r   r   zHTransform unavailable when model was fit with only a single data sample.rm  r   rn  r   r   r  zGTransforming data into an existing embedding not supported for densMAP.r   rk   a?  Transforming new data with precomputed metric. We are assuming the input data is a matrix of distances from the new points to the points in the training set. If the input matrix is sparse, it should contain distances from the new points to their nearest neighbours or approximate nearest neighbours in the training set.rj   )rP   Z
fill_valuezNeed at least n_neighbors (z) distances for each row!Nr   rQ   rp   rq  gQ?gQ?)r   r   rN   r   T)r   r   r   d            @r   )ry   r  rE   r   )Vr:  rU   r   r   r/   rV   r   r  r  r  rd  rY  rA  r   NotImplementedErrorr   r  r   r   r   r   r   rp   r   AssertionErrorr   r   rO  r  rt  r   Z	full_likerX   ry  r   r3   ro   r6   Ztake_along_axisrz   r}  r;  r?  r	   r7  r~  r5  r   r   r  r  Zargpartitionr   r  Z_angular_treesr   rJ  r4   rb   ri   r   r   r   r   r   r  r   rZ   r   r   r   r   r   rT  rU  r  r6  r   rr   rE   r   r   rF  r&   r8  r5   )r#  r}   Zx_hashrr   r   r6   r   r)   Zdata_indicesr+  r   Zdmat_shortenedZindices_sortedr   Zadjusted_local_connectivityr   r   r   r   r   r   	csr_graphr   r   r   r   r   r   r+   r+   r,   	transform
  sF   




  "(( 
 
  

    
 zUMAP.transformc                    s  j rtdnJjdkr$tdn6jr4tdn&jdkrHtd njdkrZtdt tj	d	d
 t
j}|ttdtj}tjjjddd}|j|  }tjjjjd jjd ftdtd|jjd D ]\}|j| D ]L}|jjd k r|j| |j| jjd k  d|f< d|f< qqtjtjjd jjd 		fdd|D 
tj rj t!j"ddfdd}|n0j t#j$% krt#j$j  ntd&j t'j()  
fddt* jd D t+	fddD t+fddt*t,D t+
fddt*t,
D t+	fddt*jd D j-\}	}
ddj.dj/     }tjj0||	|
ff jd jjd fd}t1|2 d d!}|j34 jd 	}|j54 jd 	}t6||j}j7dkrN|jd d"krHd#}nd$}ntj7d% }t8|j5|}|j9}|j:}|j5}t;|j|||j<j=||jd |j.j/|j>j?d& j@jt'jA) jBjCd'}|S )(a  Transform X in the existing embedded space back into the input
        data space and return that transformed output.

        Parameters
        ----------
        X : array, shape (n_samples, n_components)
            New points to be inverse transformed.

        Returns
        -------
        X_new : array, shape (n_samples, n_features)
            Generated data points new data in data space.
        z1Inverse transform not available for sparse input.Nz1Inverse transform not available for given metric.z,Inverse transform not available for densMAP.   zInverse transform works best with low dimensional embeddings. Results may be poor, or this approach to inverse transform may fail altogether! If you need a high dimensional latent space and inverse transform operations consider using an autoencoder.r   z<Inverse transform not available for transform_mode = 'graph'r   )rP   r   r   TZQJ)incrementalZqhull_optionsr   rO   r   rj   c                    s   g | ]}t  |d  dqS )r   r:   )r?   )rQ  v)r8   r:   r+   r,   rS    s   z*UMAP.inverse_transform.<locals>.<listcomp>r%  c                    s    | |f| d S r&  r+   r'  )_out_mr+   r,   _output_dist_only  s    z1UMAP.inverse_transform.<locals>._output_dist_onlyzUnrecognized output metric: {}c              	      s0   g | ]( t  fd d  D qS )c                    s&   g | ]}  j | f qS r+   )rd  )rQ  nb)r}   	dist_argsdist_only_funcr)   r#  r+   r,   rS    s   z5UMAP.inverse_transform.<locals>.<listcomp>.<listcomp>)r/   r7   )rQ  )r}   r  r  neighborhoodr#  )r)   r,   rS    s   c                    s   g | ]}t |d   qS r#   )r/   ry  )rQ  r  r  r+   r,   rS    s     c                    s   g | ]} | |  qS r+   r+   rQ  r)   )r`   idxr+   r,   rS    s     c                    s   g | ]}|  |  qS r+   r+   r  )r  r  r+   r,   rS    s     c                    s4   g | ],}t D ]}|||f  ||f gqqS r+   )rX   )rQ  r)   r*   )dists_output_spacer6   r:   r+   r,   rS    s   
 r   r   l1r   r   r  r  r  r  r  )Dr;  r   r@  r   r   r   r  r   r/   rV   r   r  r   r   r   r   r   r   ZspatialZDelaunayrd  Z	simplicesZfind_simplexr   Z
lil_matrixrU   rZ   rx   Z
csr_matrixrz   r:  r5  r   r=  r>  r   r   keysrA  r&   r8  r5   rX   r7   r3   TrT  rU  r   r
   r   r6   r  r   r  r   r   r   r   r   rz  r{  r  r6  r   r7  rE   r   )r#  r}   rr   r   Zdeltrir>   r)   r*   r  r   r   r   r   r  ZindsZinv_transformed_pointsr   r   r   r   r   r+   )r}   r  r8   r  r  r`   r  r  r6   r:   r  r#  r,   inverse_transform  s    






   

	"	
 zUMAP.inverse_transformc                 C   s  t |tjddd}t| j}|ttdtj	}| j
jd }| jdkrPtd| jr^td| jr2| jrtj| j
|g| _
nt| j
|g| _
| j
jd d	k rz.| jr| jn| j}t| j
fd
|i| j}W n ttfk
rd } zl| jr>t| js&tj| j }tj| j
 || jd}ntj| j
| j| jd}ntj| j
| j| jd}W 5 d }~X Y nX t|| j|d| jd d | j| j | j!d| j"\| _#| _$| _%t&|d d d | j'f }nd| _| jr| jt(kr| j}	n"| js| jt)kr| j}	n| j}	t*| j
| j|	| j| j|| j+d| j,| j"d
\| _-| _.| _/t| j
| j'||	| j| j-| j.| j| j | j!d| j"\| _#| _$| _%| j-}tj0| j
jd | j1ftjd}
| j2|
d |< t3|
|| | j4d krd}n| j4}t5| j
| j#| j1| j6| j7| j8| j9| j:||
|| j| j| j;| j<| j=| j>| j?| j@dk| jAd k| j"| jBd\| _2}nd| j/C  | j/D| | j/j
| _
| j/jE\| _-| _.| jr~| jt(kr~| j}	n"| js| jt)kr| j}	n| j}	t| j
| j'||	| j| j-| j.| j| j | j!d| j"\| _#| _$| _%tj0| j
jd | j1ftjd}
| j2|
d |< t3|
|| j- | j4d kr(d}n| j4}t5| j
| j#| j1| j6| j7| j8| j9| j:||
|| j| j| j;| j<| j=| j>| j?| j@dk| jAd k| j"| jBd\| _2}| j=r|d | _F|d | _Gd S )Nrm  r   rn  r   r   rk   z5Update does not currently support precomputed metricsz5Updating supervised models is not currently supportedr3  rp   rq  TFrr  rO   r0  r  r   r   )Hr   r/   rV   r   r  r   r   r   r   r   r:  rU   rp   r   r  r}  r;  r   r   Zvstackr?  r	   r7  r~  r5  r   r   r  r  r   rt  r  r   rb   rE   rY  rz  r{  ry  ro   rE  rD  r   ru   rv   rw  rx  r  rT   r   rd  r  r   r  r6  rT  rU  r  r   r   rG  r   rF  r8  r   rr   r   prepareupdater|   re  rf  )r#  r}   rr   r   Zoriginal_sizer+  r   r  r   r  r   r   r   r+   r+   r,   r  _  sx   


 

 
zUMAP.updatec                 C   sn   ddl m} dd l}|ddddd}d|_|| }|jdd||jd	}|jd
d||jd	}|dd|}|S )Nr   )_EstimatorPrettyPrinterTr   2   )compactindentZindent_at_nameZn_max_elements_to_showztqdm_kwds={.*}, )flagsz
 *

z, +z, )Zsklearn.utils._pprintr  r  Z_changed_onlypformatsubS)r#  r  r  pprepr_r+   r+   r,   __repr__8  s    
zUMAP.__repr__)'r   r   r   Nr   NNrN   r   r@   rN   Trj   rN   rN   rN   rl   r  NNNFrj   r  Nrm   r  r   FFNFFrR   r  r@   FNr  )N)N)N)__name__
__module____qualname____doc__r$  rM  r<  rX  ri  rk  rl  r  r  r  r  r  r  r  r+   r+   r+   r,   r  V  s~    n                                       
W  
1HFH
   S
- U ) Zr  )r@   F)rM   rN   rN   )TTrj   F)FF)rN   r   )r   r   )F)rm   F)cZ
__future__r   localewarningsr   timeZscipy.optimizer   Zsklearn.baser   Zsklearn.utilsr   r   Zsklearn.utils.validationr   Zsklearn.metricsr	   Zsklearn.preprocessingr
   Zsklearn.neighborsr   r   ImportErrorZsklearn.externalsZnumpyr/   Zscipy.sparser   r   ru  r   rv  Zscipy.sparse.csgraphr=  Zumap.distancesr`   r   Zumap.sparser   Z
umap.utilsr   r   r   r   Zumap.spectralr   Zumap.layoutsr   r   r   Zpynndescentr   Zpynndescent.distancesr   rD  Zpynndescent.sparser   rE  	setlocale
LC_NUMERICZiinfor   rz   r   r4   r   r\   r_   r0   rY   rH  r'   r-   r?   rF   r>  typesrV   ri   r   r   r   r   Zjitr   r   r   r   r   r   r   r   rB  r  r  r  r  r  r  r+   r+   r+   r,   <module>   s   		  
!	o    
_  V
 2*
*


"
O   
'
  O
 ,
