U
    mdX                     @   s  d dl Zd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ e Zdd Zdd Zdd Zdd Zej !ddddgdd  Z"ed!d" Z#ej j!d#ej$d$d%d&d%d&gd'd(ej$d%d$d&gd'd(ej$d)gd'd(fej$d*d+d,d+d,ge%d(ej$d+d*d,ge%d(ej$d-ge%d(fe$d*d+d,d+d,ge$d+d*d,ge$d-gfgd'd.d/gd0d1d2 Z&d3d4 Z'ej !dd/d.gd5d6 Z(d7d8 Z)ej j!d9ej$d$d%d&d%d&gd'd(ej$d*d+d,d+d,ge%d(e$d*d+d,d+d,ggd'd.d/gd0d:d; Z*d<d= Z+d>d? Z,d@dA Z-dBdC Z.dDdE Z/dFdG Z0dHdI Z1dJdK Z2dLdM Z3dNdO Z4dPdQ Z5dRdS Z6dTdU Z7dVdW Z8dXdY Z9dZd[ Z:d\d] Z;d^d_ Z<dS )`    N)issparse)
coo_matrix)
csc_matrix)
csr_matrix)
dok_matrix)
lil_matrix)type_of_target)assert_array_equal)ignore_warnings)_to_object_arrayLabelBinarizer)MultiLabelBinarizer)LabelEncoder)label_binarize)_inverse_binarize_thresholding)_inverse_binarize_multiclass)datasetsc                 C   s   t | dr|  } | S )Ntoarray)hasattrr   a r   _/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/sklearn/preprocessing/tests/test_label.pyr      s    
r   c               
   C   s  ddddg} t dd}tddddggj}|| }t|jdg t|| t|||  t dd}|| }t|s~t	t|jdg t||
  t||
 |  t dd}ddddg} tddddggj}|| }t|jddg t|| tddgddgddgddgg}t|||  dd	d
d	dg} tddddgddddgddddgddddgddddgg}|| }t|jdd
d	dg t|| t|||  d S )NposFsparse_outputr   Tneg   ZspamZhameggs0)r   nparrayTfit_transformr	   classes_inverse_transformr   AssertionErrorr   )inplbexpectedgotZ	to_invertr   r   r   test_label_binarizer%   s:    







"4

r,   c               
   C   s   t  } tdddgdddgdddgg}| dddg}t|| tdddgdddgdddgdddgdddgdddgg}| ddddddg}t|| d S )	Nr   r   bder   cf)r   r!   r"   r$   r	   	transform)r)   r*   r+   r   r   r   "test_label_binarizer_unseen_labelsN   s    "
2r3   c               
   C   s   t ddd} tddddg}tddddggj}| |}t|| t| || t ddd} tdddddg}tddddgddddgddddgddddgddddgg}| |}t|| t| || d S )Nr   	neg_label	pos_labelr         )r   r!   r"   r#   r$   r	   r&   )r)   r(   r*   r+   r   r   r   'test_label_binarizer_set_label_encoding\   s&    






	

r:   dtypeZInt64ZFloat64booleanc              
   C   sf   t d}ddlm} |jdddddddddg	| d}| |}|ddg}t|dgdgg dS )ziChecks that LabelBinarizer works with pandas nullable dtypes.

    Non-regression test for gh-25637.
    Zpandasr   r   r   r;   N)pytestZimportorskipZsklearn.preprocessingr   ZSeriesfitr2   r	   )r;   pdr   Zy_truer)   Zy_outr   r   r   $test_label_binarizer_pandas_nullablex   s    
 rA   c               	   C   s  t ddddg} t | }dddg}d}tjt|d || W 5 Q R X t }d}tjt|d |g  W 5 Q R X tjt|d |g  W 5 Q R X ddddg}d	}td
dd}tjt|d || W 5 Q R X d}td
d
d}tjt|d || W 5 Q R X d}tdd
dd}tjt|d || W 5 Q R X d}tjt|d* t	t
dd
gd
dggddd
gdd W 5 Q R X g dd
gdgdddgd
gg}d}tjt|d t | W 5 Q R X d}tjt|d, t	t
dd
gd
dggddd
dgdd W 5 Q R X d}tjt|d2 t	t dd
dgd
ddggddd
dgdd W 5 Q R X d}tjt|d$ t t ddgd
dgg W 5 Q R X tjt|d* tt ddgd
dggdd
dgd W 5 Q R X d S )Nr   r8   r9   )r   )r   r8   z@You appear to be using a legacy multi-label data representation.matchz.This LabelBinarizer instance is not fitted yetr   z3neg_label=2 must be strictly less than pos_label=1.r8   r5   z3neg_label=2 must be strictly less than pos_label=2.zqSparse binarization is only supported with non zero pos_label and zero neg_label, got pos_label=2 and neg_label=1Tr6   r7   r   zfoo format is not supportedZfoo)youtput_typeclasses	thresholdr9   z?You appear to be using a legacy multi-label data representationzAThe number of class is not equal to the number of dimension of y.z!output_type='binary', but y.shapebinaryz@Multioutput target data is not supported with label binarizationrH   )r!   r"   r   r?   r>   raises
ValueErrorr2   r&   r   r   r$   r   )Z	one_classr)   Zmulti_labelerr_msgZinput_labelsZy_seq_of_seqsr   r   r   test_label_binarizer_errors   sv    
(rO   zvalues, classes, unknownr8   r   r9   Zint64r=      r-   r   r0   r.   objectstr)Zidsc              	   C   s   t  }||  t|j| t|| dddddg t|dddddg|  t  }|| }t|dddddg tjt	dd || W 5 Q R X d S )Nr   r   r8   zunseen labelsrC   )
r   r?   r	   r%   r2   r&   r$   r>   rL   rM   )valuesrH   unknownleretr   r   r   test_label_encoder   s    

rW   c               
   C   s   t  } | ddddddg t| jdddddg t| dddddddgdddddddg t| dddddddgdddddddg tt | ddg W 5 Q R X d S )	Nr   rP      r   r8   r9      )	r   r?   r	   r%   r2   r&   r>   rL   rM   )rU   r   r   r    test_label_encoder_negative_ints   s    , r[   c              	   C   sJ   t  }|tjddg| d d}tjt|d |d W 5 Q R X d S )NappleZoranger=   zshould be a 1d arrayrC   )r   r?   r!   r"   r>   rL   rM   r2   )r;   rU   msgr   r   r    test_label_encoder_str_bad_shape  s
    r^   c               	   C   s   t  } tt | g  W 5 Q R X tt | g  W 5 Q R X t  } | dddddg d}tjt|d | dg W 5 Q R X tjt|d | ddd	g W 5 Q R X d
}tjt|d | d W 5 Q R X d S )Nr   r8   r9   rY   z!contains previously unseen labelsrC   r4   z should be a 1d array.+shape \(\) )r   r>   rL   rM   r2   r&   r?   )rU   r]   r   r   r   test_label_encoder_errors  s    rb   rS   c                 C   sH   t  }||  |g }ttg | |g }ttg | d S )N)r   r?   r2   r	   r!   r"   r&   )rS   rU   ZtransformedZinverse_transformedr   r   r   test_label_encoder_empty_array(  s    



rc   c                  C   s  dd dd dd g} t dddgdddgdddgg}| d  }dD ]}| D ]}t|d}|| }t||kszt|r|jj|jjkst|	 }t
|| t
dd	d
g|j |||kstt|d}|| | }t||kst|r|jj|jjkst|	 }t
|| t
dd	d
g|j |||ksPtqPqFtt2 |tt dddgd	ddgdddgg W 5 Q R X d S )Nc                   S   s
   dddgS NrB   r   r   r8   r   r   r   r   r   <lambda>?      z9test_sparse_output_multilabel_binarizer.<locals>.<lambda>c                   S   s   ddhdhddhfS Nr8   r9   r   r   r   r   r   r   rg   @  rh   c                   S   s   t t dt dddhgS NrB   re   r   r8   iterr   r   r   r   rg   A  rh   r   r   TFr   r8   r9   )r!   r"   r   r$   r   r'   indicesr;   Zindptrr   r	   r%   r&   r?   r2   r>   rL   rM   r   )inputsindicator_matinverser   r(   mlbr+   r   r   r   'test_sparse_output_multilabel_binarizer<  s8    "





rs   c                  C   s   dd dd dd g} t dddgdddgdddgg}| d  }| D ]}t }|| }t|| tdddg|j |||kstt }|| 	| }t|| tdddg|j |||ksFtqFd S )	Nc                   S   s
   dddgS rd   r   r   r   r   r   rg   g  rh   z+test_multilabel_binarizer.<locals>.<lambda>c                   S   s   ddhdhddhfS ri   r   r   r   r   r   rg   h  rh   c                   S   s   t t dt dddhgS rj   rk   r   r   r   r   rg   i  rh   r   r   r8   r9   )
r!   r"   r   r$   r	   r%   r&   r'   r?   r2   )ro   rp   rq   r(   rr   r+   r   r   r   test_multilabel_binarizerd  s"    "


rt   c                  C   sF   t  } ddgdgg g}tddgddgddgg}t| || d S )Nr   r8   r   )r   r!   r"   r	   r$   )rr   rF   Yr   r   r   &test_multilabel_binarizer_empty_sample}  s    rv   c               	   C   s   t  } ddgg}tddgddgg}d}tjt|d" | |ddgddgg}W 5 Q R X tdddgdddgg}t dddgd} tjt|d" | |ddgddgg}W 5 Q R X t|| d S )	Nr   r8   r   zunknown class.* will be ignoredrC   rP   r9   rK   )	r   r!   r"   r>   ZwarnsUserWarningr?   r2   r	   )rr   rF   ru   Zwarning_messagematrixr   r   r   'test_multilabel_binarizer_unknown_class  s    
&&ry   c               	   C   sN  dddg} t dddgdddgdddgg}tdddgd}t|| | t|jdddg tdddgd}t|| | | t|jdddg td	dddgd}t|| t dgdgdgg|f t|jd	dddg t	| } tdddgd}t|| | | d
}tddddgd}t
jt|d ||  W 5 Q R X d S )NrB   re   rf   r   r   r9   r8   rK   rP   ztThe classes argument contains duplicate classes. Remove these duplicates before passing them to MultiLabelBinarizer.rC   )r!   r"   r   r	   r$   r%   r?   r2   Zhstackrl   r>   rL   rM   )r(   rp   rr   rN   r   r   r   'test_multilabel_binarizer_given_classes  s,    
" rz   c                  C   s   dddg} t dddgdddgdddgg}t dddgdddgdddgg}tdddgd}t|| | dddg|_t|| | d S )	NrB   re   rf   r   r   r9   r8   rK   )r!   r"   r   r	   r$   rH   )r(   rp   Zindicator_mat2rr   r   r   r   (test_multilabel_binarizer_multiple_calls  s    
""r{   c                  C   s   dgdgdgg} t dddgdddgdddgg}t }t|| | t|||  t }t|| | | t|||  d S )Nr   r   r8   )r!   r"   r   r	   r$   r&   r?   r2   r(   rp   rr   r   r   r   .test_multilabel_binarizer_same_length_sequence  s    "r}   c               	   C   s<  t dddg} dddgddd	gfd
ddgdddgfdddg| fg}tdddgdddgdddgg}|D ]\}}t }tj|td}t||| t|j| tj||td}t|| t }t|	|
|| t|j| tj||td}t|| qht }tt |i i ddifg W 5 Q R X d S )Nre   r8   r9   )23)1)r   r   r   r   r   )r-   r0   r   )r   r-   r   r-   r0   )r~   r   )re   )re   r~   r   r   r=   )r   r!   r"   r   rQ   r	   r$   r%   r&   r?   r2   r>   rL   	TypeError)Ztuple_classesro   rp   r(   rH   rr   Zindicator_mat_invr   r   r   ,test_multilabel_binarizer_non_integer_labels  s*    "
r   c                  C   s0   dg} t ddgg}t }t|| | d S )Nr   r   r   r   r   )r!   r"   r   r	   r$   r|   r   r   r   $test_multilabel_binarizer_non_unique  s    r   c               	   C   s   dg} t  }||  tt |tddgg W 5 Q R X |tddgg |tddgg |tddgg tt |tdgg W 5 Q R X tt |tdddgg W 5 Q R X d S )Nr   r   r9   r   )r   r$   r>   rL   rM   r&   r!   r"   )r(   rr   r   r   r   ,test_multilabel_binarizer_inverse_validation  s    
 r   c               	   C   s   t ddgddddgd} tddddgddddgg}t| | t ddgddddgd} tddddgddddgg}t| | t ddddgddddgd} tddddgddddgddddgddddgg}t| | d S )Nr   rZ   r8   rP   rK   r   r9   )r   r!   r"   r	   )outr*   r   r   r   $test_label_binarize_with_class_order
  s    

2r   c              
   C   s,  dD ] }|dks|dkrH|rHt t t| ||||d W 5 Q R X qt| ||||d}tt|| t||ksxtt| }|dkrt	||d}nt
||||| d d}tt|t|  t|||d}	|	| }tt|| t||kst|	|}
tt|
t|  t|
t| kstqd S )	Nrm   r   rH   r6   r7   r   Z
multiclassrK   g       @)rG   rH   rI   rE   )r>   rL   rM   r   r	   r   r   r'   r   r   r   r   r$   r&   )rF   rH   r7   r6   r*   r   Z	binarizedZy_typeZinversedr)   Zinverse_outputr   r   r   check_binarized_results  sR    

  

r   c                  C   s   dddg} ddg}d}d}t ddgddgddggd d df d}t| |||| dddg} ddg}d}d}t ddgddgddggd d df d}t| |||| d S )Nr   r   r8   rY   )rY   r   r9   )r!   r"   Zreshaper   rF   rH   r7   r6   r*   r   r   r   test_label_binarize_binaryL  s    
.
.r   c               	   C   sf   dddg} dddg}d}d}dt d }t| |||| tt t| |d|dd W 5 Q R X d S )Nr   r   r8   r9   rY   Tr   )r!   eyer   r>   rL   rM   r   r   r   r   r   test_label_binarize_multiclass_  s    

    r   c               	      s   t dddgdddgdddgg dddg} d}d}|  } fddtttttfD } g| D ]}t|| ||| qbt	t
 t|| d|dd W 5 Q R X d S )	Nr   r   r8   c                    s   g | ]}| qS r   r   ).0Zsparse_matrixZy_indr   r   
<listcomp>t  s   z2test_label_binarize_multilabel.<locals>.<listcomp>rY   Tr   )r!   r"   r   r   r   r   r   r   r>   rL   rM   r   )rH   r7   r6   r*   Zy_sparserF   r   r   r   test_label_binarize_multilabeln  s.    "

    r   c                	   C   s   t t tddgddgddd W 5 Q R X t jtdd tddgddgd	 W 5 Q R X t jtd
d tddggdddgd	 W 5 Q R X d S )Nr   r8   r   )rH   r7   r6   zcontinuous target data is not rC   g333333?g@rK   zmismatch with the labelsr9   )r>   rL   rM   r   r   r   r   r   !test_invalid_input_label_binarize  s    "r   c                  C   sF   t tdddgdddgdddggtd} t| tdddg d S )Nr   r   rY   r9   )r   r   r!   Zaranger	   r"   )r+   r   r   r    test_inverse_binarize_multiclass  s
     r   c                  C   s8   t  } | dddtjg | tjg}t|dg dS )z]Check that label encoder encodes nans in transform.

    Non-regression test for #22628.
    r   r-   r8   N)r   r?   r!   nanr2   r	   )rU   Zy_transr   r   r   test_nan_label_encoder  s    r   )=numpyr!   r>   Zscipy.sparser   r   r   r   r   r   Zsklearn.utils.multiclassr   Zsklearn.utils._testingr	   r
   Zsklearn.utilsr   Zsklearn.preprocessing._labelr   r   r   r   r   r   Zsklearnr   Z	load_irisZirisr   r,   r3   r:   markZparametrizerA   rO   r"   rQ   rW   r[   r^   rb   rc   rs   rt   rv   ry   rz   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   )

N


	
(#3	