U
    mdY                     @   sv  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ej+,dZ-ddgddgddgddgddgddggZ.ddddddgZ/ddddddgZ0ddgddgddggZ1dddgZ2dddgZ3e*4 Z5e-6e5j7j8Z9e&e5j:e5j7e-d\e5_:e5_7e*; Z<e&e<j:e<j7e-d\e<_:e<_7dd  Z=d!d" Z>ej?@d#d$d%gd&d' ZAd(d) ZBd*d+ ZCej?@d,d-d.d/gd0d1 ZDej?@d#d$d%gd2d3 ZEd4d5 ZFd6d7 ZGd8d9 ZHd:d; ZId<d= ZJd>d? ZKd@dA ZLdBdC ZMdDdE ZNdFdG ZOej?@d#d$d%gdHdI ZPdJdK ZQej?@d#d$d%gdLdM ZRej?@dNe e5j:e5j7fe e<j:e<j7fgdOdP ZSdQdR ZTej?@dSee#fee$fgdTdU ZUej?@dVeegdWdX ZVdYdZ ZWdS )[z6Testing for the boost module (sklearn.ensemble.boost).    N)
csc_matrix)
csr_matrix)
coo_matrix)
dok_matrix)
lil_matrix)assert_array_equalassert_array_less)assert_array_almost_equal)BaseEstimator)clone)DummyClassifierDummyRegressor)LinearRegression)train_test_split)GridSearchCV)AdaBoostClassifier)AdaBoostRegressor)_samme_proba)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)datasets      foo   random_statec                     s   t dddgdddgddd	gddd
gg  t  jddd d t jf   G  fddd} |  }t|dt  }t|j j t 	|
 sttt j|ddddddg tt j|ddddddg d S )Nr   gư>r   gRQ?g333333?皙?igRQ?g      ?g&.>Zaxisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXZprobs d/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaC   s    z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r-   r+   r*   r+   r,   MockEstimatorB   s   r1   r    r   )nparrayabssumnewaxisr   	ones_liker   r&   isfiniteallAssertionErrorZargminargmax)r1   ZmockZsamme_probar+   r*   r,   test_samme_proba7   s    "$r<   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r2   Zoneslenr)   r   fitr	   r-   )Zy_tclfr+   r+   r,   test_oneclass_adaboost_probaT   s    r@   	algorithmSAMMESAMME.Rc                 C   sz   t | dd}|tt t|tt tt	t
t|j |tjttdfks\t|tjttfksvtd S )Nr   rA   r"   r   )r   r>   r)   y_classr   predictT	y_t_classr2   uniqueasarrayclasses_r-   r&   r=   r:   decision_function)rA   r?   r+   r+   r,   test_classification_toy]   s    rM   c                  C   s*   t dd} | tt t| tt d S )Nr   r!   )r   r>   r)   y_regrr   rF   rG   y_t_regrr?   r+   r+   r,   test_regression_toyh   s    
rQ   c                  C   s  t tj} d  }}dD ]}t|d}|tjtj t| |j |	tj}|dkr^|}|}|j
d t| kstt|tjj
d t| kst|tjtj}|dkstd||f t|jdkstttdd |jD t|jkstqd	|_td
t |	tj|  d S )NrB   rC   rA   rB   r   g?z'Failed with algorithm %s and score = %fc                 s   s   | ]}|j V  qd S r%   r!   .0Zestr+   r+   r,   	<genexpr>   s     ztest_iris.<locals>.<genexpr>rC   r   )r2   rI   iristargetr   r>   datar   rK   r-   r&   r=   r:   rL   scoreestimators_setrA   r   r4   )classesZ	clf_sammeZ
prob_sammealgr?   probarZ   r+   r+   r,   	test_iriso   s(    
r`   lossZlinearZsquareZexponentialc                 C   st   t | dd}|tjtj |tjtj}|dks8tt|jdksJttt	dd |jD t|jksptd S )Nr   )ra   r"   g?r   c                 s   s   | ]}|j V  qd S r%   r!   rT   r+   r+   r,   rV      s     z test_diabetes.<locals>.<genexpr>)
r   r>   diabetesrY   rX   rZ   r:   r=   r[   r\   )ra   regrZ   r+   r+   r,   test_diabetes   s    rd   c                 C   s  t jd}|jdtjjd}|jdtjjd}t| dd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}	d	d |jtj
tj|dD }
t|dkstt||d
  t|dkstt||d
  t|
dkstt|	|
d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}	dd |jtj
tj|dD }
t|dkstt||d
  t|
dkstt|	|
d
  d S )Nr   
   size)rA   n_estimatorssample_weightc                 S   s   g | ]}|qS r+   r+   rU   pr+   r+   r,   
<listcomp>   s     z'test_staged_predict.<locals>.<listcomp>c                 S   s   g | ]}|qS r+   r+   rk   r+   r+   r,   rm      s     c                 S   s   g | ]}|qS r+   r+   rU   sr+   r+   r,   rm      s    r   )rh   r"   c                 S   s   g | ]}|qS r+   r+   rk   r+   r+   r,   rm      s     c                 S   s   g | ]}|qS r+   r+   rn   r+   r+   r,   rm      s   )r2   randomRandomStaterandintrW   rX   r&   rb   r   r>   rY   rF   staged_predictr-   staged_predict_probarZ   staged_scorer=   r:   r	   r   )rA   rngZiris_weightsZdiabetes_weightsr?   ZpredictionsZstaged_predictionsr_   Zstaged_probasrZ   Zstaged_scoresr+   r+   r,   test_staged_predict   sF      rw   c                  C   sh   t t d} dddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N)	estimator)r   r   rR   )rh   estimator__max_depthrA   r   rx   r"   )rh   ry   )
r   r   r   r>   rW   rY   rX   r   r   rb   )boost
parametersr?   r+   r+   r,   test_gridsearch   s    


r}   c                  C   s   dd l } dD ]p}t|d}|tjtj |tjtj}| |}| |}t	||j
ks`t|tjtj}||kstqtdd}|tjtj |tjtj}| |}| |}t	||j
kst|tjtj}||kstd S )Nr   rR   rS   r!   )pickler   r>   rW   rY   rX   rZ   dumpsloadstype	__class__r:   r   rb   )r~   r^   objrZ   ro   obj2Zscore2r+   r+   r,   test_pickle   s$    





r   c               	   C   s~   t jdddddddd\} }dD ]X}t|d	}|| | |j}|jd dksRt|d dtjf |dd  k	 s tq d S )
Ni  re   r    r   Fr   )	n_samples
n_featuresZn_informativeZn_redundantZ
n_repeatedr   r"   rR   rS   )
r   Zmake_classificationr   r>   feature_importances_r&   r:   r2   r6   r9   )r)   yr^   r?   Zimportancesr+   r+   r,   test_importances   s    


r   c               	   C   sF   t  } td}tjt|d | jttt	
dgd W 5 Q R X d S )Nz*sample_weight.shape == (1,), expected (6,)matchr   ri   )r   reescapepytestraises
ValueErrorr>   r)   rE   r2   rJ   )r?   msgr+   r+   r,   ,test_adaboost_classifier_sample_weight_error  s    
r   c               	   C   s   ddl m}  t|  }|tt tt dd}|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}dd	dd
g}tt dd}tjtdd ||| W 5 Q R X d S )Nr   )RandomForestClassifierrB   rS   )RandomForestRegressorr!   r   r   barr   zworse than randomr   )sklearn.ensembler   r   r>   r)   rN   r   rE   r   r   r   r   r   r   )r   r?   r   ZX_failZy_failr+   r+   r,   test_estimator  s    
r   c               	   C   s@   d} t dddd}tjt| d |tjtj W 5 Q R X d S )Nz+Sample weights have reached infinite values   g      7@rB   )rh   Zlearning_raterA   r   )r   r   warnsUserWarningr>   rW   rY   rX   )r   r?   r+   r+   r,   test_sample_weights_infinite6  s    r   c                  C   s<  G dd dt } tjddddd\}}t|}t||dd	\}}}}tttt	t
fD ]}||}||}	t| d
dddd||}
t| d
dddd||}|
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	|}|||}t|| |
|	}||}t||D ]\}}t|| qZ|
|	}||}t||D ]\}}t|| q|
|	}||}t||D ]\}}t|| q|
|	|}|||}t||D ]\}}t|| qdd |
jD }tdd |D sTtqTd S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S z<Modification on fit caries data type for later verification.ri   superr>   r   
data_type_r(   r)   r   rj   r   r+   r,   r>   C  s    
z1test_sparse_classification.<locals>.CustomSVC.fit)Nr.   r/   r0   __doc__r>   __classcell__r+   r+   r   r,   	CustomSVC@  s   r   r         *   )Z	n_classesr   r   r"   r   r!   T)ZprobabilityrB   )rx   r"   rA   c                 S   s   g | ]
}|j qS r+   r   rU   ir+   r+   r,   rm     s     z.test_sparse_classification.<locals>.<listcomp>c                 S   s   g | ]}|t kp|tkqS r+   r   r   rU   tr+   r+   r,   rm     s     )r   r   Zmake_multilabel_classificationr2   Zravelr   r   r   r   r   r   r   r>   rF   r   rL   r	   Zpredict_log_probar-   rZ   Zstaged_decision_functionziprs   rt   ru   r[   r9   r:   )r   r)   r   X_trainX_testy_trainy_testsparse_formatX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_resultsdense_results
sprase_res	dense_restypesr+   r+   r,   test_sparse_classification=  sz    	   

  


















r   c                  C   s
  G dd dt } tjddddd\}}t||dd	\}}}}tttttfD ]}||}||}	t	|  dd

||}
t	|  dd

|| }}|
|	}||}t|| |
|	}||}t||D ]\}}t|| qdd |
jD }tdd |D sJtqJd S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S r   r   r   r   r+   r,   r>     s    
z-test_sparse_regression.<locals>.CustomSVR.fit)Nr   r+   r+   r   r,   	CustomSVR  s   r   r   2   r   r   )r   r   	n_targetsr"   r   r!   rz   c                 S   s   g | ]
}|j qS r+   r   r   r+   r+   r,   rm     s     z*test_sparse_regression.<locals>.<listcomp>c                 S   s   g | ]}|t kp|tkqS r+   r   r   r+   r+   r,   rm     s     )r   r   Zmake_regressionr   r   r   r   r   r   r   r>   rF   r	   rs   r   r[   r9   r:   )r   r)   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r,   test_sparse_regression  sD    	   
    




r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jksBtdS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r%   r+   )r(   r)   r   r+   r+   r,   r>     s    zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r2   Zzerosr&   r'   r+   r+   r,   rF     s    zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r.   r/   r0   r>   rF   r+   r+   r+   r,   DummyEstimator  s   r   r    )rh   N)	r
   r   r>   r)   rN   r=   Zestimator_weights_Zestimator_errors_r:   )r   r{   r+   r+   r,   %test_sample_weight_adaboost_regressor  s    r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   r   r    r   Zmost_frequent)ZstrategyN)r2   rp   rq   Zrandnchoicer   r   r>   rF   r-   r   r   )rv   r)   Zycyrr{   r+   r+   r,   test_multidimensional_X  s    



r   c              	   C   s\   t jt j }}tt }t|| d}d|jj}t	j
t|d ||| W 5 Q R X d S )N)rx   rA   z {} doesn't support sample_weightr   )rW   rY   rX   r   r   r   formatr   r.   r   r   r   r>   )rA   r)   r   rx   r?   err_msgr+   r+   r,   -test_adaboostclassifier_without_sample_weight  s    
r   c            
      C   sR  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k s,t||	k s:t|t|	ksNtd S )Nr   r   d     )numg?r#   g-C6?r   r   re   i'  rx   rh   r"   ri   )r2   rp   rq   ZlinspaceZrandr&   Zreshaper   r   r   r>   r7   rZ   r:   r   Zapprox)
rv   r)   r   Zregr_no_outlierZregr_with_weightZregr_with_outlierrj   Zscore_with_outlierZscore_no_outlierZscore_with_weightr+   r+   r,   $test_adaboostregressor_sample_weight  s0       
r   c                 C   sZ   t tjddddi\}}}}t| dd}||| ttj||dd|	| d S )NT)Z
return_X_yr"   r   rD   r   r$   )
r   r   Zload_digitsr   r>   r   r2   r;   r-   rF   )rA   r   r   r   r   modelr+   r+   r,    test_adaboost_consistent_predict"  s    
 r   zmodel, X, yc              	   C   sD   t |}d|d< d}tjt|d | j|||d W 5 Q R X d S )Nir   z1Negative values in data passed to `sample_weight`r   ri   )r2   r7   r   r   r   r>   )r   r)   r   rj   r   r+   r+   r,   #test_adaboost_negative_weight_error2  s
    
r   c                  C   s~   t jd} | jdd}| jddgdd}t |d }tdd	d
}t|dd	d}|j|||d t 	|j
 dksztdS )zCheck that we don't create NaN feature importance with numerically
    instable inputs.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20320
    r   )r   re   rf   r   r   r   gtDS 'T	re      )	max_depthr"      r   ri   N)r2   rp   rq   normalr   r7   r   r   r>   isnanr   r5   r:   )rv   r)   r   rj   treeZ	ada_modelr+   r+   r,   Ftest_adaboost_numerically_stable_feature_importance_with_small_weightsB  s    r   zAdaBoost, Estimatorc              	   C   s^   t ddgddgg}t ddg}| | d}d}tjt|d ||| W 5 Q R X d S )	Nr   r   r       r   )Zbase_estimatorzV`base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.r   )r2   r3   r   r   FutureWarningr>   )AdaBoostZ	Estimatorr)   r   r   warn_msgr+   r+   r,   'test_base_estimator_argument_deprecatedT  s    r   r   c              	   C   s^   t ddgddgg}t ddg}|  }||| d}tjt|d |j W 5 Q R X d S )Nr   r   r    r   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r   )r2   r3   r>   r   r   r   Zbase_estimator_)r   r)   r   r   r   r+   r+   r,   'test_base_estimator_property_deprecatedi  s    r   c               	   C   s4   t t } tjtdd | jdd W 5 Q R X dS )zCheck that setting base_estimator parameters works.

    During the deprecation cycle setting "base_estimator__*" params should
    work.

    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/25470
    zParameter 'base_estimator' ofr   r   )Zbase_estimator__max_depthN)r   r   r   r   r   Z
set_paramsrP   r+   r+   r,   4test_deprecated_base_estimator_parameters_can_be_set|  s    
r   )Xr   numpyr2   r   r   Zscipy.sparser   r   r   r   r   Zsklearn.utils._testingr   r   r	   Zsklearn.baser
   r   Zsklearn.dummyr   r   Zsklearn.linear_modelr   Zsklearn.model_selectionr   r   r   r   r   Z!sklearn.ensemble._weight_boostingr   Zsklearn.svmr   r   Zsklearn.treer   r   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearnr   rp   rq   rv   r)   rE   rN   rG   rH   rO   Z	load_irisrW   ZpermutationrX   rg   permrY   Zload_diabetesrb   r<   r@   markZparametrizerM   rQ   r`   rd   rw   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r+   r,   <module>   s   (

  	

"

-]0
	&

	

