U
    md46                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dl	mZ d dlmZmZmZ dddd	gd	d
ddgd dddgddd	dggZe jddd d d d gdddd gddddgddddggfdd d d d gd d d d gddddgddddggfdd d d d gddddgddddgddddggfgdd Zdd Zdd Ze jddd d d d gd ddd gddddgddddggfdd d d d gd d d d gddddgddddggfdd d d d gd dddgddddgddddggfgdd Ze jddddgdd  Zd!d" Ze jd#edd$d%d& Zd'd( Ze jd)dd d d d ddgd d d d ddgd d ddd*d*gfdd d d d ddgd d ddddgd d ddd+d*gfdd d d dddgd d ddddgd ddd+d*d*gfgd,d- Ze jd.dd/d0d1dgdd2d3dgdd4d/dgdd4d/dggfdd5d6d7d8gd5d6d7d8gd9d:d;d<gd=d>d?d@ggfdd/d0d1dAgdd2d3dBgdd4d/dCgdd4d/dCggfge jdDdEdFdGgdHdI Ze jddddgdJdK ZdLdM Ze jdNdd dd+gfdd dd+gfgdOdP Z dQdR Z!e jdSej"ej#ej$ge jdTdej#ej$ge jdDdEdFdGgdUdV Z%e jdWej"ej#ej$ge jdDdEdFdGgdXdY Z&e j'dZe jd[dd\gd]d^ Z(d_d` Z)dadb Z*dcdd Z+e jdedFdfdg ed+D fdGdhdg ed+D fdEdidg ed+D fgdjdk Z,dS )l    N)clone)KBinsDiscretizer)OneHotEncoder)assert_array_almost_equalassert_array_equalassert_allclose_dense_sparse      ?      @g      g      @      ?   g      @   zstrategy, expecteduniformZkmeansquantilec                 C   s,   t dd| d}|t t||t d S )N   ordinaln_binsencodestrategy)r   fitXr   	transform)r   expectedest r   h/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s    	
r    c                   C   sR   t ddt t tdgd dt t ddtjjttksNt	d S )Nr   r   r   )
r   fit_transformr   nparrayr   n_bins_dtypeintAssertionErrorr   r   r   r   test_valid_n_bins    s    r)   c               	   C   s   t dd} t| d}d}tjt|d |t W 5 Q R X dddg} t| d}d}tjt|d |t W 5 Q R X ddddg} t| d}d}tjt|d |t W 5 Q R X d	dd	dg} t| d}d
}tjt|d |t W 5 Q R X d S )N)r             @r!   z:n_bins must be a scalar or array of shape \(n_features,\).matchr   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.g @z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r#   fullr   pytestraises
ValueErrorr"   r   )r   r   err_msgr   r   r   test_invalid_n_bins_array&   s,    




r3   c                 C   s~   t ddddgd| dt}t||t ttjd }|jj|fksNt	t
|j|jD ]\}}|j|d fks\t	q\d S )Nr   r   r   r   r   )r   r   r   r   r   r#   r$   shape
bin_edges_r(   zipr%   )r   r   r   Z
n_features	bin_edgesr   r   r   r   test_fit_transform_n_bins_arrayL   s    	
  r8   r   c              	   C   s   t d tddgddgddgddgg}t| ddd}d	}tjt|d
 || W 5 Q R X |j	d dkstt
||}t|d d df t|jd  d S )Nalwaysr   r   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r,   )warningssimplefilterr#   r$   r   r/   warnsUserWarningr   r%   r(   r   r   Zzerosr4   )r   r   r   warning_messageXtr   r   r   test_same_min_maxa   s    
"
r@   c               	   C   st   t d} tdd}tt ||  W 5 Q R X tdd}|| dd tt ||  W 5 Q R X d S )Nr*   r   r!   r   r   )	r#   aranger   r/   r0   r1   r   reshaper   )r   r   r   r   r   test_transform_1d_behavioro   s    


rC   i	   c                 C   sd   t dddddgdd}t dddddgdd}|d	|   }td
dd|}t|| d S )Nr+         @g      @g       @g      $@r   r   r   
   r   r   r   r   )r#   r$   rB   r   r"   r   )rD   ZX_initZXt_expectedr   r?   r   r   r   test_numeric_stability{   s
    rI   c                  C   s   t ddddgddt} | t}t ddddgddt} | t}t|rVtttdd dD d	d
	|| t ddddgddt} | t}t|stttdd dD dd
	|
 |
  d S )Nr   r   r   rH   onehot-densec                 S   s   g | ]}t |qS r   r#   rA   .0rD   r   r   r   
<listcomp>   s     z'test_encode_options.<locals>.<listcomp>)r   r   r   r   F)
categoriesZsparse_outputonehotc                 S   s   g | ]}t |qS r   rK   rL   r   r   r   rN      s     T)r   r   r   r   spissparser(   r   r   r"   Ztoarray)r   ZXt_1ZXt_2ZXt_3r   r   r   test_encode_options   s4    

 
 rS   z8strategy, expected_2bins, expected_3bins, expected_5binsr*   r   c                 C   s   t ddddddgdd}td| d	d
}||}t||  td| d	d
}||}t||  td| d	d
}||}t||  d S )Nr   r   r   r   rE   rG   r   r   r   r   r   r      )r#   r$   rB   r   r"   r   Zravel)r   Zexpected_2binsZexpected_3binsZexpected_5binsr   r   r?   r   r   r   test_nonuniform_strategies   s    


rV   zstrategy, expected_invg      r+   g      g      @g      rF   g      g      @g      g      g      g      @g      g      ?g      ?g      @g      g      ?g      g        g      ?r   r   rP   rJ   c                 C   s0   t d| |d}|t}||}t|| d S )Nr   rT   )r   r"   r   inverse_transformr   )r   r   Zexpected_invkbdr?   Xinvr   r   r   test_inverse_transform   s    "

rZ   c                 C   s   t ddddgd d d f }td| dd}|| t dd	gd d d f }||}t|jdd
d |j t|jdd
dg d S )Nr   r   r   r   r*   r   rT   r   rU   )Zaxis)	r#   r$   r   r   r   r   maxr%   min)r   r   rX   ZX2ZX2tr   r   r    test_transform_outside_fit_range   s    

r]   c                  C   s   t ddddgd d d f } |  }tddd}|| }t| | | }||}t|| t|t dgdgd	gd	gg d S )
Nr   r   r   r   r   rH   r   r	   r   )r#   r$   copyr   r"   r   rW   )r   ZX_beforer   r?   Z	Xt_beforerY   r   r   r   test_overwrite   s    



r_   zstrategy, expected_bin_edgesc              	   C   sd   dgdgdgdgdgdgg}t d| d}d}tjt|d || W 5 Q R X t|jd | d S )Nr   r   )r   r   'Consider decreasing the number of bins.r,   )r   r/   r<   r=   r   r   r5   )r   Zexpected_bin_edgesr   rX   r>   r   r   r   test_redundant_bins   s    ra   c               	   C   s   t dddgdd} t ddddddg}t d	d	d
gdd}tdddd}d}tjt|d ||  W 5 Q R X t|j	d	 | t|
| | d S )Ng?gffffff?r   r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   r*   rG   r   r   r   r`   r,   )r#   r$   rB   r   r/   r<   r=   r   r   r5   r   )r   r7   r?   rX   r>   r   r   r   !test_percentile_numeric_stability
  s    rb   in_dtype	out_dtypec                 C   sr   t jt| d}td||d}|| |d k	r4|}n"|d krP|jt jkrPt j}n|j}||}|j|ksnt	d S N)r&   r   )r   r   r&   )
r#   r$   r   r   r   r&   float16float64r   r(   )rc   rd   r   X_inputrX   Zexpected_dtyper?   r   r   r   test_consistent_dtype  s    

ri   input_dtypec                 C   sd   t jt| d}td|t jd}|| ||}td|t jd}|| ||}t|| d S re   )	r#   r$   r   r   float32r   r   rg   r   )rj   r   rh   Zkbd_32ZXt_32Zkbd_64ZXt_64r   r   r   test_32_equal_64+  s    



rl   z,ignore:In version 1.3 onwards, subsample=2e5	subsamplewarnc                 C   s   t ddddgdd}tdddd	}|| t|}|j| d
 || t|jd |jd D ]\}}t j	
|| qf|jj|jjkstd S )Nr   r	   r
   r   r   rG   r   r   r   rm   r   )r#   r$   rB   r   r   r   
set_paramsr6   r5   testingZassert_allcloser4   r(   )rm   r   kbd_defaultkbd_with_subsamplingZbin_kbd_defaultZbin_kbd_with_subsamplingr   r   r   'test_kbinsdiscretizer_subsample_default@  s    

 rt   c               	   C   sV   t ddddgdd} tdddd	d
}d}tjt|d ||  W 5 Q R X d S )Nr   r	   r
   r   r   rG   r   r   r   )r   r   r   rm   z4`subsample` must be used with `strategy="quantile"`.r,   )r#   r$   rB   r   r/   r0   r1   r   )r   rX   r2   r   r   r   0test_kbinsdiscretizer_subsample_invalid_strategyS  s
    ru   c               	   C   sP   t jdddd} tdddd}d}tjt|d	 ||  W 5 Q R X d S )
NiA r   r   d   r   r   r   >In version 1.3 onwards, subsample=2e5 will be used by default.r,   )	r#   randomrandrB   r   r/   r<   FutureWarningr   )r   rX   msgr   r   r   $test_kbinsdiscretizer_subsample_warn]  s
    r|   c               	   C   s   t jdddd} tdddd}t|}|jtdd	 d
}tj	t
|d ||  W 5 Q R X ||  t |jd |jd krt|jj|jjkstd S )Ni`[ r   r   rG   r   r   r   g     jAro   rw   r,   r   )r#   rx   ry   rB   r   r   rp   r'   r/   r<   rz   r   allr5   r(   r4   )r   rr   rs   r{   r   r   r   &test_kbinsdiscretizer_subsample_valuesg  s    
r~   zencode, expected_namesc                 C   s.   g | ]&}t d D ]}d| dt| qqS r*   feat_rangefloatrM   col_idZbin_idr   r   r   rN   |  s   
 rN   c                 C   s.   g | ]&}t d D ]}d| dt| qqS r   r   r   r   r   r   rN     s   
 c                 C   s   g | ]}d | qS r   r   )rM   r   r   r   r   rN     s     c                 C   s   dddgdddgdddgdd	dgg}t d	| d
|}||}dd tdD }||}|jd |jd ksttt|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    r   r   r
   r   r   r   r   r   r*   rH   c                 S   s   g | ]}d | qS r   r   rL   r   r   r   rN     s     z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>N)r   r   r   r   Zget_feature_names_outr4   r(   r   )r   Zexpected_namesr   rX   r?   Zinput_featuresZoutput_namesr   r   r   *test_kbinsdiscrtizer_get_feature_names_outw  s    $

r   )-r/   numpyr#   Zscipy.sparsesparserQ   r:   Zsklearnr   Zsklearn.preprocessingr   r   Zsklearn.utils._testingr   r   r   r   markZparametrizer    r)   r3   r8   r@   rC   r   rI   rS   rV   rZ   r]   r_   ra   rb   rf   rk   rg   ri   rl   filterwarningsrt   ru   r|   r~   r   r   r   r   r   <module>   s   ,...
&...



...














 
 
	


	