U
    md	~                  %   @   s,  d dl mZ d dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZ eejd eejjZeejjZejddd~d
dZejdddd Zejdddd Zejdddd Zejdejejjejjddddejjejjddddgejejdddd Zejejejejjddejejjddfejjejjddddejjejjddddejjejjddddejjejjddddgdejjddd ejjddd ejjejjejjejjejjddddd Z ejdddd Z!ejejej"ejjej"ejjfejjejjddddejjejjddddejjejjddddejjejjddddgdejjejjejjejjejjdddd d! Z#ejejejjejjddddejjejjddddejjejjddddejjejjddddgdejjejjejjejjejjejjd"ddd#d$ Z$e d%d& Z%ejdd'd(d) Z&ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdejjddd ejjejjejj'ejjd+d,d-d. Z(e d/d0 Z)e d1d2 Z*e dd4d5Z+e d6d7 Z,e d8d9 Z-ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdd'd:d; Z.e d<d= Z/ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdejj'ejj'd>d,d?d@ Z0ej1dd'dAdB Z2e dCdD Z3e dEdF Z4e dGdH Z5e dIdJ Z6e dKdL Z7e dMdN Z8e dOdP Z9e dQdR Z:ejdejjejjejjejj'ejjdSd,dTdU Z;ej1dddVdWdX Z<e dYdZ Z=ejdd[ejjid,d\d] Z>e d^d_ Z?e d`da Z@ejdejjejjejjejj'ejjdbd,dcdd ZAej1dddVdedf ZBe dgdh ZCdidj ZDe eCfdkdlZEe ddmdnZFe dodp ZGe dqdr ZHejddsdtddvdwZIejddsdtddxdyZJe&e&e(e)e)e)e*e*e*e*e+e-e.e,e/e4e3e5e6e7e8e9e:e?eEeEeFeFeFeFe@eGeGeHeHeHdz$ZKd{ZLe(ejMd|e(ejMd|e;e<d|e>e<d|eAeBd|e0e2d|d}ZNdS )    )print_functionN)normtau_rand)kantorovichjensen_shannon_divergencesymmetric_kl_divergenceCT)cacheh㈵>:0yE>c                 C   s$   t | | }|||t |  kS N)npabs)abZrtolatoldiff r   K/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/pynndescent/sparse.pyisclose   s    r   c                 C   s@   t | }t t jdt jd|dd  |d d kf}|| S )N   Zdtype)r   sortconcatenateonesZbool_)Zarrauxflagr   r   r   
arr_unique   s    
.r   c                 C   s:   | j d dkr|S |j d dkr$| S tt| |fS d S Nr   )shaper   r   r   )ar1ar2r   r   r   	arr_union&   s
    r#   c                 C   s:   t | |f}|  |d d |dd  |d d k S )Nr   r   )r   r   r   )r!   r"   r   r   r   r   arr_intersect2   s    r$   zi4(i4[:],i4[:])r   )readonly)i1i2)localsc           	      C   s   | j d dks|j d dkr dS d}d}| j d d }|j d d }| | }|| }d}||kr|d7 }||k r|d7 }| | }nq||k r|d7 }|| }qqqX||k r||k r|d7 }| | }qX||k r||k r|d7 }|| }qXqqX|S Nr   r   r    )	r!   r"   r&   r'   Zlimit1Zlimit2j1j2resultr   r   r   fast_intersection_size:   s6    



r.   )
result_indresult_datavalr&   r'   r+   r,   )fastmathr(   r	   c                 C   s  | j d |j d  }tj|tjd}tj|tjd}d}d}d}	|| j d k r(||j d k r(| | }
|| }|
|kr|| ||  }|dkr|
||	< |||	< |	d7 }	|d7 }|d7 }q@|
|k r|| }|dkr|
||	< |||	< |	d7 }	|d7 }q@|| }|dkr|||	< |||	< |	d7 }	|d7 }q@|| j d k rv| | }
|| }|dkrj|
||	< |||	< |	d7 }	|d7 }q(||j d k r|| }|| }|dkr|||	< |||	< |	d7 }	|d7 }qv|d |	 }|d |	 }||fS Nr   r   r   )r    r   zerosint32float32)ind1data1ind2data2Zresult_sizer/   r0   r&   r'   nnzr+   r,   r1   r   r   r   
sparse_sump   sb     





r<   c                 C   s   t | ||| S r   )r<   )r7   r8   r9   r:   r   r   r   sparse_diff   s    r=   )r1   r&   r'   r+   r,   c                 C   s   t jjt jj}t jjt jj}d}d}|| jd k r||jd k r| | }|| }	||	kr|| ||  }
|
dkr|| ||
 |d7 }|d7 }q,||	k r|d7 }q,|d7 }q,||fS r)   )	numbatypedListZ
empty_listtypesr5   r6   r    append)r7   r8   r9   r:   r/   r0   r&   r'   r+   r,   r1   r   r   r   
sparse_mul   s$    




rC   )r-   r1   r&   r'   r+   r,   c                 C   s   | j d }|j d }d}d}d}| | }	|| }
|	|
kr|| ||  }||7 }|d7 }||krd|S | | }	|d7 }||kr|S || }
q0|	|
k r|d7 }||kr|S | | }	q0|d7 }||kr|S || }
q0|S )Nr           r   r*   )r7   r8   r9   r:   Zdim1Zdim2r-   r&   r'   r+   r,   r1   r   r   r   sparse_dot_product  s8    




rE   c                 C   s  t | |}tj|jd tjd}tj|jd tjd}d}d}d}	|| jd k r*||jd k r*| | }
|| }|
|kr|| ||  }|dkr|| ||	< || ||	< |	d7 }	|d7 }|d7 }qB|
|k r|| }|dkr|| ||	< |	d7 }	|d7 }qB|| }|dkr || ||	< |	d7 }	|d7 }qB|| jd k rl|| }|dkr`|| ||	< |	d7 }	|d7 }q*||jd k r|| }|dkr|| ||	< |	d7 }	|d7 }ql|d |	 }|d |	 }||fS r3   )r#   r   r4   r    r6   )r7   r8   r9   r:   r/   Zresult_data1Zresult_data2r&   r'   r;   r+   r,   r1   r   r   r   dense_union>  sV    
 





rF   )r2   c                 C   sD   t | |||\}}d}t|jd D ]}||| d 7 }q$t|S )NrD   r      )r=   ranger    r   sqrtr7   r8   r9   r:   _aux_datar-   ir   r   r   sparse_euclideanx  s
    rN   z#f4(i4[::1],f4[::1],i4[::1],f4[::1]))rL   r-   r   dimrM   )r2   r(   c           	      C   sD   t | |||\}}d}t|}t|D ]}||| ||  7 }q&|S NrD   )r=   lenrH   )	r7   r8   r9   r:   rK   rL   r-   rO   rM   r   r   r   sparse_squared_euclidean  s    rR   c                 C   s@   t | |||\}}d}t|jd D ]}|t|| 7 }q$|S NrD   r   r=   rH   r    r   r   rJ   r   r   r   sparse_manhattan  s
    rU   c                 C   sB   t | |||\}}d}t|jd D ]}t|t|| }q$|S rS   )r=   rH   r    maxr   r   rJ   r   r   r   sparse_chebyshev  s
    rW          @c           	      C   sL   t | |||\}}d}t|jd D ]}|t|| | 7 }q$|d|  S )NrD   r         ?rT   )	r7   r8   r9   r:   prK   rL   r-   rM   r   r   r   sparse_minkowski  s
    r[   c                 C   s$   t | |||d jd }t|| S r   )r=   r    float)r7   r8   r9   r:   
n_featuresnum_not_equalr   r   r   sparse_hamming  s    r_   c                 C   s~   t |}t |}t| |||\}}d| t j}t| |||\}}	t |	}	t||	||\}
}d}|D ]}||7 }ql|S )NrY   rD   )r   r   r<   Zastyper6   r=   rC   )r7   r8   r9   r:   Z	abs_data1Z	abs_data2Z
denom_inds
denom_dataZ
numer_inds
numer_datarK   Zval_datar-   r1   r   r   r   sparse_canberra  s    



rb   c           	      C   sv   t | |||\}}t|}|jd dkr.dS t|}|dkrDdS t| |||\}}t|}t|}t|| S Nr   rD   )r<   r   r   r    sumr=   r\   )	r7   r8   r9   r:   rK   r`   denominatorra   	numeratorr   r   r   sparse_bray_curtis  s    



rg   c                 C   sB   t | |}| jd |jd  | }|dkr.dS t|| | S d S rc   r.   r    r\   r7   r8   r9   r:   	num_equalnum_non_zeror   r   r   sparse_jaccard  s
    
rl   )rk   rj   c                 C   sN   t | |}| jd |jd  | }|dkr.dS |dkr:tS t||  S d S rc   )r.   r    FLOAT32_MAXr   log2ri   r   r   r   sparse_alternative_jaccard  s    
ro   c                 C   s   dt d|   S )NrY   rX   )pow)vr   r   r   correct_alternative_jaccard  s    rr   c                 C   s6   t | |}| jd |jd  | }|| }t|| S r   rh   r7   r8   r9   r:   r]   num_true_truerk   r^   r   r   r   sparse_matching  s    
ru   c                 C   sJ   t | |}| jd |jd  | }|| }|dkr6dS |d| |  S d S )Nr   rD   rX   r.   r    r7   r8   r9   r:   rt   rk   r^   r   r   r   sparse_dice#  s    
rx   c                 C   sR   t | |}| jd |jd  | }|| }|dkr6dS t|| | ||  S d S rc   rh   rs   r   r   r   sparse_kulsinski/  s    
ry   c                 C   s:   t | |}| jd |jd  | }|| }d| ||  S Nr   rX   rv   rs   r   r   r   sparse_rogers_tanimoto=  s    
r{   c                 C   sl   | j d |j d kr&t| |kr&dS t| |}|t|dkkrX|t|dkkrXdS t|| | S d S rc   )r    r   allr.   rd   r\   )r7   r8   r9   r:   r]   rt   r   r   r   sparse_russellraoF  s    "
$r}   c                 C   s:   t | |}| jd |jd  | }|| }d| ||  S rz   rv   rs   r   r   r   sparse_sokal_michenerS  s    
r~   c                 C   sJ   t | |}| jd |jd  | }|| }|dkr6dS |d| |  S d S )Nr   rD   g      ?rv   rw   r   r   r   sparse_sokal_sneath\  s    
r   c           
      C   st   t | |||\}}d}t|}t|}|D ]}	||	7 }q*|dkrL|dkrLdS |dks\|dkr`dS d|||   S d S NrD   rY   )rC   r   )
r7   r8   r9   r:   rK   rL   r-   norm1norm2r1   r   r   r   sparse_cosineh  s    
r   )r-   norm_xnorm_yrO   rM   c                 C   s   t | |||\}}d}t|}t|}t|}	t|	D ]}
|||
 7 }q6|dkr\|dkr\dS |dksl|dkrptS |dkr|tS || | }t|S d S rP   )rC   r   rQ   rH   rm   r   rn   )r7   r8   r9   r:   rK   rL   r-   r   r   rO   rM   r   r   r   sparse_alternative_cosinez  s    r   )r2   r	   c                 C   s2   t dt| dds| dk rdS dtd|   S d S NrD   gHz>)r   rY   rX   )r   r   rp   dr   r   r   !sparse_correct_alternative_cosine  s    r   c                 C   s   t | |||}d| S )NrY   )rE   r7   r8   r9   r:   r-   r   r   r   
sparse_dot  s    r   r-   c                 C   s*   t | |||}|dkrtS t| S d S rP   )rE   rm   r   rn   r   r   r   r   sparse_alternative_dot  s    r   c                 C   sT  d}d}d}| j d dkr,|j d dkr,dS | j d dksH|j d dkrLdS t|j d D ]}||| 7 }qZt|j d D ]}||| 7 }qz|| }|| }tj|j d tjd}	tj|j d tjd}
t|j d D ]}|| | |	|< qt|j d D ]}|| | |
|< qtt|	d || j d  |d   }tt|
d ||j d  |d   }t| |	||
\}}t|}|D ]}||7 }q~t| j d D ]$}| | |kr||	| | 8 }qt|j d D ]$}|| |kr||
| | 8 }qt	| |}||| ||j d   7 }|dkr2|dkr2dS |dkr@dS d|||   S d S )NrD   r   rY   r   rG   )
r    rH   r   emptyr6   rI   r   rC   setr#   )r7   r8   r9   r:   r]   Zmu_xZmu_yZdot_productrM   Zshifted_data1Zshifted_data2r   r   Zdot_prod_indsZdot_prod_dataZcommon_indicesr1   Zall_indicesr   r   r   sparse_correlation  sT      

r   c                 C   s   t | |||\}}d}t|}t|}t|| }	|D ]}
|t|
7 }q<|dkrd|dkrddS |dkst|dkrxdS ||	krdS td||	  S d S r   )rC   r   rd   rI   )r7   r8   r9   r:   aux_indsrL   r-   r   r   Zsqrt_norm_prodr1   r   r   r   sparse_hellinger  s    

r   )r-   	l1_norm_x	l1_norm_yrO   rM   c                 C   s   t | |||\}}d}t|}t|}t|}	t|	D ]}
|t||
 7 }q:|dkrf|dkrfdS |dksv|dkrztS |dkrtS t|| | }t|S d S rS   )rC   r   rd   rQ   rH   rI   rm   rn   )r7   r8   r9   r:   r   rL   r-   r   r   rO   rM   r   r   r   sparse_alternative_hellinger	  s    

r   c                 C   s8   t dt| dds| dk rdS tdtd|   S d S r   )r   r   r   rI   rp   r   r   r   r   $sparse_correct_alternative_hellinger)  s    r   c                 C   s   t | |k S r   )r   r6   )xyr   r   r   dummy_ground_metric1  s    r   c                    s   t   fdd}|S )a  Generate a "ground_metric" suitable for passing to a ``sparse_kantorovich``
    distance function. This should be a metric that, given indices of the data,
    should produce the ground distance between the corresponding vectors. This
    allows the construction of a cost_matrix or ground_distance_matrix between
    sparse samples on the fly -- without having to compute an all pairs distance.
    This is particularly useful for things like word-mover-distance.

    For example, to create a suitable ground_metric for word-mover distance one
    would use:

    ``wmd_ground_metric = create_ground_metric(word_vectors, cosine)``

    Parameters
    ----------
    ground_vectors: array of shape (n_features, d)
        The set of vectors between which ground_distances are measured. That is,
        there should be a vector for each feature of the space one wishes to compute
        Kantorovich distance over.

    metric: callable (numba jitted)
        The underlying metric used to cpmpute distances between feature vectors.

    Returns
    -------
    ground_metric: callable (numba jitted)
        A ground metric suitable for passing to ``sparse_kantorovich``.
    c                    s    |   | S r   r   )Zindex1index2ground_vectorsmetricr   r   ground_metricS  s    z+create_ground_metric.<locals>.ground_metric)r>   njit)r   r   r   r   r   r   create_ground_metric6  s    r   c                 C   sh   t | jd |jd f}t| jd D ]2}t|jd D ]}|| | || |||f< q:q(t|||S r   )r   r   r    rH   r   )r7   r8   r9   r:   r   Zcost_matrixrM   jr   r   r   sparse_kantorovichZ  s
    r   c                 C   s  d}d}d}d}d}	d}
d}t |}t |}dd }|| jd k rF|	|jd k rF| | }||	 }||kr||||  7 }|
|| | 7 }
|||	 | 7 }||
| |}|}|d7 }|	d7 }	q8||k r
||||  7 }|
|| | 7 }
||
| |}|}|d7 }q8||||  7 }|||	 | 7 }||
| |}|}|	d7 }	q8|| jd k r| | }||||  7 }|
|| | 7 }
||
| |}|}|d7 }qF|	|jd k r||	 }||||  7 }|||	 | 7 }||
| |}|}|	d7 }	qt |d| S )NrD   r   c                 S   s   t t | |S r   )r   powerr   )r   rZ   r   r   r   <lambda>q      z'sparse_wasserstein_1d.<locals>.<lambda>r   rY   )r   rd   r    r   )r7   r8   r9   r:   rZ   r-   Zold_inddeltar&   r'   Zcdf1Zcdf2r   r   r   r+   r,   r   r   r   sparse_wasserstein_1de  s^    

 



r   c                 C   s   t | |||\}}t||S r   )rF   r   r7   r8   r9   r:   Zdense_data1Zdense_data2r   r   r    sparse_jensen_shannon_divergence  s    r   c                 C   s   t | |||\}}t||S r   )rF   r   r   r   r   r   sparse_symmetric_kl_divergence  s    r   F)parallelr	   rY   c              	   C   s  t | jd D ]}| |df g}	||df g}
td| jd D ] }| ||f dk r^ qfd}tt|	D ]}|	| }||| ||f  || ||f d   }||| ||f  || ||f d   }||| ||d   }||| ||d   }|||||}|
| tkrn||||f k rnt||k rnd} q<qn|rB|	| ||f  |
|||f  qBt| jd D ]P}|t|	k r|	| | ||f< |
| |||f< nd| ||f< tj	|||f< qtq| |fS )Nr   r   TFr   )
r>   pranger    rH   rQ   FLOAT32_EPSr   rB   r   inf)indicesZ	distancesdata_indicesdata_indptr	data_datadist	rng_stateprune_probabilityrM   Znew_indicesZnew_distancesr   r   kcZfrom_ind	from_dataZto_indto_datar   r   r   r   	diversify  sD      r   c	                 C   s  | j d d }	t|	D ]~}
|| |
 | |
d   }|| |
 | |
d   }t|}tj|j d tjd}td|j d D ]}|| }t|D ]}|| }|| dkr|| }|| }||| ||d   }||| ||d   }||| ||d   }||| ||d   }|||||}|| tkr||| k rt	||k rd||<  q~qq~t|j d D ],}|| }|| dkrjd|| |
 | < qjqd S )Nr   r   r   )
r    r>   r   r   Zargsortr   Zint8rH   r   r   )Zgraph_indptrZgraph_indicesZ
graph_datar   r   r   r   r   r   Zn_nodesrM   Zcurrent_indicesZcurrent_dataorderZretainedidxr   r   lrZ   qZ	from_indsr   Zto_indsr   r   r   r   r   diversify_csr  s6    
r   )$	euclideanl2ZsqeuclideanZ	manhattanl1ZtaxicabZ	chebyshevZlinfZlinftyZ	linfinityZ	minkowskiZcanberraZ
braycurtishammingjaccardZdicematching	kulsinskirogerstanimoto
russellraosokalmichenerZsokalsneathcosinecorrelationr   ZwassersteinZwasserstein_1dzwasserstein-1dkantorovich-1dr   	hellingerzjensen-shannonZjensen_shannonzsymmetric-klZsymmetric_klZsymmetric_kullback_liebler)r   r   r   r   r   r   r   )r   Z
correction)r   r   r   dotr   r   )r
   r   )rX   )r   )rY   )rY   )O
__future__r   localenumpyr   r>   Zpynndescent.utilsr   r   Zpynndescent.distancesr   r   r   	setlocale
LC_NUMERICZfinfor6   Zepsr   rV   rm   r   r   r   r#   r$   rA   r5   ZArrayZuint16r.   Tupler<   r=   ZListTyperC   rE   rF   rN   ZintprR   rU   rW   r[   r_   rb   rg   rl   ro   Z	vectorizerr   ru   rx   ry   r{   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsparse_named_distancesZsparse_need_n_featuresrI   Z!sparse_fast_distance_alternativesr   r   r   r   <module>   s.  







)	
?


	
'
9

	
	



	


	













 
	
<



$
?

	 6
 8,

