U
    md                     @   s  d Z ddlmZ ddlZddlZddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ dd Zdd Zg Zg Zd	D ]Zed
dD ]\ZZeeeeZdededed  d dfZddddgfZeeeD ]6\ZZ e!eee eef e!e de de  qqqtej"j#ej"j$deedej"$dddgej"$dddgej"$dddgej"$dddgej"$dddgdd Z%ej"$d ddgd!d" Z&ej"$d#d$d%gd$d%d&ggd'd( Z'ej"$d#d$d%gd$d%d&ggd)d* Z(d+d, Z)d-d. Z*ej+d/d0 Z,d1d2 Z-d3d4 Z.d5d6 Z/d7d8 Z0ej"$d9d:d;d<gej"$dddgej"$d=d>d?d	gej"$d@ddgej"$dAddgdBdC Z1ej"$dDddgej"$dEddddddFdGgdddddgddFddFdgfdddGdFdddgdddddgddFdFddgfdddGddFddgdddddgddFdFddgfgdHdI Z2ej+dJdK Z3ej"$dLddddMdddgdNdOdPgfddddMdddgdNdQdPgfddddMdddgdNdRdSgfddddTdUdVdVgdNdRdSgfgdWdX Z4ej+dYdZ Z5ej"$d[dddddFdd\d]d^ddGg	dUdUd_dVdVdVdVd_d_g	fdddddFdddGgdUdUd_d_d_d_gfdddddd\d]d^gdUdUdVdVdVdVgfdddddgdUdUd_gfgd`da Z6ej+dbdc Z7ej"$dddddgej8dedfdggdhdidjgdkfdddddgeedgedldmdngedodpej9ggddddgddddgddddggdhdidjgdqfgej"$dddgdrds Z:ej"$d@ddgej"$dtddgej"$duddMej;ddddddddddddgej<dvfddTe;dUdVdVdwdwdwdUdUdwdwdwdwgfgdxdy Z=dzd{ Z>ej"$d@ddgej"$duddMej;ddddddddddddgej<dvfddTe;dUdVdVdwdwdwdUdUdwdwdwdwgfgd|d} Z?ej"$d@ddgej"$duddMej;ddddddddddddddddddgej<dvfddTe;dUdVdVdwdwdwdUdUdwdwdwdwdwdwdwdwdwdwgfgd~d Z@ej"$d@ddgej"$ddddddddddddddgfddddddgfgej"$duddMej;ddddddddddddgej<dvfddTe;d_dwd_dwd_dwd_dwd_dwdwdwgfgdd ZAej"$d@ddgej"$dtddgej"$duddMej;ddddddddddddgej<dvfddTe;dUdVdVdwdwdwdUdUdwdwdwdwgfgdd ZBej"$dddMdddgfddTdUdUd_gfgdd ZCej"$ddeDdddddddgfdeDddg ddddddgfgej"$d@ddgdd ZEej"$dddgdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKej"$d ddgdd ZLdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    )productN)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetimec                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer	   tmassert_series_equal)dfresultZdf_mi_expectedZmi_expectedexpected r!   _/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/pandas/tests/groupby/test_value_counts.py.tests_value_counts_index_names_category_column   s    
r#   c                 C   s   t jd tddd}tt jtd|t j||t jd|d |d}| r|d d	|d< t j	|j
dd d
df< t j	|j
dd ddf< t j	|j
dd ddf< t j	|j
dd ddf< t j	|j
dd ddf< |S )Ni  z
2015-08-24
   )Zperiodsabcdr   )1st2nd3rdr(   float   r&         r'            	   )nprandomseedr
   r   choicelistrandintr   nanloc)	seed_nansnmdaysframer!   r!   r"   seed_df/   s     r>   )TF)d   i  )      r@   r(   r      r&   r'   -zdf, keys, bins, n, m)idsisortTFznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   dd }|||	|
|d}| j ||d}|d jf |}| j ||d}|d jtjf|}|jjd d dg |j_||}t|||f\}}t	|
 |
  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r5   mapr   Zget_level_valuesrangeZnlevelsr   from_arraysrK   )r   Zarrr!   r!   r"   rebuild_index^   s    z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerG   rH   rI   binsrG   r(   )r   r   applyr	   r   rK   renamerL   r   r   
sort_index)r   keysrQ   r:   r;   rE   rP   r   rG   rH   rI   rO   kwargsgrleftrightr!   r!   r"    test_series_groupby_value_countsT   s    

r\   utcc              	   C   s   t dddddddgddd	d	d
d
dgddg}t|d | dd|d< |tddd}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NGI])J]鍙J]K])<M]U=M]驍N]applebananaorangepear	TimestampFoodr+   rj   sr]   unitDatetime1Dfreqkeyrk   r   )r   dropr   r   r   r   rV   rT   r	   r   rK   rU   r   r   )r]   r   dfgr   r    r!   r!   r"   -test_series_groupby_value_counts_with_groupery   s*    	
rv   r   ABCc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   rS   r   )dtyper   rJ   )r   r   r   r	   rz   r   rN   lenr   r   r   r   r   ru   r   r    r!   r!   r"   &test_series_groupby_value_counts_empty   s    
r}   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   rS   )r   rM   r{   r   r   r   r   r|   r!   r!   r"   (test_series_groupby_value_counts_one_row   s
    r   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r   Zorderedrz   r   r~   r   r   )r	   r   r   r   r   rN   r1   arrayr   r   r   )rl   r   r    r!   r!   r"   /test_series_groupby_value_counts_on_categorical   s"       r   c               	   C   s   t ddddddgddddddgddddddgd} | jd	d
gddd }|jdd}tddgddgdddggdddddgdddddgdddddggd	d
dgd}tdddddg|dd}t|| d S )Nmaler   lowmediumhighr   FRr   	educationr   r   r   FrR   r   r   r   rB   levelscodesrK   r   r   )r   r   r   r   r	   r   r   )r   gbr   r   r    r!   r!   r"   (test_series_groupby_value_counts_no_sort   s    &r   c                	   C   s4   t ddddddgddddddgddddddgdS )	Nr   r   r   r   r   r   r   r   r   r!   r!   r!   r"   education_df   s    r   c              	   C   s4   | j ddd}tjtdd |  W 5 Q R X d S )Nr   r   axisr   match)r   pytestraisesNotImplementedErrorr   r   gpr!   r!   r"   	test_axis   s    r   c              	   C   s6   |  d}tjtdd |jdgd W 5 Q R X d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   r   r!   r!   r"   test_bad_subset   s    
r   c                 C   s\   |  dddg jdd}tdddddgtjdd	d
ddgdddgddd}t|| d S )Nr   r   r   TrP         ?      ?r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   rF   r   )r   r   r	   r   from_tuplesr   r   )r   r   r    r!   r!   r"   
test_basic   s"    
r   c                 C   s   | | j |||dS )NrP   rG   rH   )r   )r   rW   rP   rG   rH   r!   r!   r"   _frame_value_counts  s    r   r   columnr   functionzsort, ascending)FN)TTas_indexr=   c                    s  d d j  fddd| } j||d}	|	ddg j|||d}
|r|	tddg|||}|rrt|
| n|rzd	nd
}| jd|idd}|dkr|jddidd}t	
|d dd|d< n0|dkr|d dk|d< nt	
|d dd|d< t|
| n d d  d   d< |	d j|||d}||_|r|jjdd}|d jdjd|d< |d jdjd|d< |d= |jdd idd}t||_t|
| nV|dd|d jdjd |dd|d jdjd |d= t|
| d S )Nr   c                    s    d |  dkS )Nr   r   r!   )xr   r!   r"   <lambda>+      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>)r   r   r   )byr   r   r   r   rF   r   r   r   r   r   level_0r   r   r   rC   ZbothFr   rB   )valuesr   r   rT   r   r   r   reset_indexrU   r1   whereassert_frame_equalr   r   Zto_framestrsplitgetr   r   insert)r   r   rP   r   rG   rH   r   r=   r   r   r   r    Zindex_framer!   r   r"   $test_against_frame_and_seriesgroupby  sd    
        ""r   rP   zCsort, ascending, expected_rows, expected_count, expected_group_sizer+      c           
         s   j ddgddd}|d j|||d}t }	dD ]  fdd	|D |	 < q2|rn||	d
< |	d
  |  < n||	d< t||	 d S )Nr   r   F)r   rG   r   r   )r   r   r   c                    s   g | ]}  | qS r!   r!   .0rowr   r   r!   r"   
<listcomp>t  s     z!test_compound.<locals>.<listcomp>rF   r   )r   r   r   r   r   )
r   rP   rG   rH   expected_rowsZexpected_countZexpected_group_sizer   r   r    r!   r   r"   test_compound[  s      r   c                   C   s4   t ddddgddddgddddgdddd	d
gdS )Nr   rB   r      r   )rs   num_legs	num_wingsZfalcondogcatantr   r   r!   r!   r!   r"   
animals_df}  s    "
r   z?sort, ascending, normalize, name, expected_data, expected_indexr   )r   r   r   )rB   r   r   )rB   r   r   )rB   r   r   )r   rB   r   )r   rB   r   rF   r   r   c           
      C   s`   | j |||d}t|tj|dddgd|d}t|| | dj |||d}	t|	| d S )N)rG   rH   rP   rs   r   r   rJ   r   )r   r	   r   rN   r   r   r   )
r   rG   rH   rP   r   expected_dataexpected_indexresult_framer    result_frame_groupbyr!   r!   r"   test_data_frame_value_counts  s(       
  r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r+   rB   r@   r/   r-   )rw   rx   ry   D)r1   r7   r   )r:   r!   r!   r"   nulls_df  s    r   z:group_dropna, count_dropna, expected_rows, expected_valuesr-   r   r/   g      ?c           
         st   j ddg|d}|jdd|d}t }jD ]  fdd|D | < q.t|}t||dd	}	t||	 d S )
Nrw   rx   )rI   T)rP   rG   rI   c                    s   g | ]}  | qS r!   r!   r   r   r   r!   r"   r     s     z,test_dropna_combinations.<locals>.<listcomp>rF   r   )	r   r   r   r   r   r   r	   r   r   )
r   Zgroup_dropnaZcount_dropnar   expected_valuesr   r   r   r   r    r!   r   r"   test_dropna_combinations  s    

r   c                 C   s(   t ddddgddddgd| | dgdS )Nr   JohnAnneBethSmithLouise)rs   
first_namemiddle_namer   )Znulls_fixturer!   r!   r"   names_with_nulls_df  s    


r   z%dropna, expected_data, expected_indexr   r   )r   r   )r   r   rs   r   r   rJ   r   r   r   r   r   r   c           	      C   s`   | j ||d}t|||d}|r0|tt| }t|| | dj ||d}t|| d S )N)rI   rP   r   rs   )r   r	   r)   r{   r   r   r   )	r   rI   rP   r   r   r   r   r    r   r!   r!   r"   #test_data_frame_value_counts_dropna  s    !
 r   observedznormalize, name, expected_datarz           c                 C   s   |  djd||d}|j|d}tjddddd	d
ddddddgdddgd}t|||d}	tdD ]"}
|	jjt	|	jj
|
 |
d|	_qd|rt||	 n |	j|rdndd}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   r   r+   levelrF   r   r   )r   r   r   r   r   r	   rM   r   
set_levelsr   r   r   r   r   r   r   r   r   rP   r   r   r   r   r   expected_seriesir    r!   r!   r"   =test_categorical_single_grouper_with_only_observed_categories  sL    
   

r   c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|dddgd|d	}	t	d
D ]@}
t
|	jj|
 }|
dkr|| d jj}|	jj||
d|	_qf|rt||	 n|	j|d}t|| d S )Nr   r   ASIAr   r   r   r   rJ   r   r+   r   r   r   )copyr   r   Zadd_categoriesr   r   r	   r   r   rM   r   r   r   Zset_categoriesr   r   r   r   r   r   )r   r   r   r   rP   r   r   r   r   r   r   Zindex_levelr    r!   r!   r"   !assert_categorical_single_grouperT  s.    
r   c                 C   s6   ddddddddd	d
ddg}t | |d||||d d S )Nr   r   r   r   r   r   r   r   r   r   r   r   Tr   r   r   r   rP   r   r   r   r   r   rP   r   r   r   r!   r!   r"   -test_categorical_single_grouper_observed_truew  s,    r   c                 C   sB   ddddddddd	d
ddddddddg}t | |d||||d d S )Nr   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   Fr   r   r   r!   r!   r"   .test_categorical_single_grouper_observed_false  s8    -r   zobserved, expected_index)r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r^||dk n|tj|dddgd|d	}	td
D ]"}
|	jj	t
|	jj|
 |
d|	_q|rt||	 n |	j|rdndd}t|| d S )Nr   r   r   r   r   r   r   rJ   r   rB   r   rF   r   r   r   r   r   r   r	   r   r   rM   r   r   r   r   r   r   r   r   )r   r   r   r   rP   r   r   r   r   r   r   r    r!   r!   r"   "test_categorical_multiple_groupers  s8    7   

r   c                 C   s   |   } | d d| d< | d d| d< | jd||d}|j|d}ddd	d
ddddddddg}t|tj|dddgd|d}	tddD ]"}
|	jj	t
|	jj|
 |
d|	_q|rt||	 n |	j|rdndd}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   r   r   r+   r   rF   r   r   r   r   r!   r!   r"   test_categorical_non_groupersH  sJ     

r  z*normalize, expected_label, expected_valuesc                 C   s   t dddgdddgd}|jdddgddd	 gd
d}|jd| d}t dtjdddgtjdddddgddddgddddg||i}t|| d S )Nr   rB   r+   )rw   rx   r   r@   rw   c                 S   s   | dkrdS dS )Nr   r-   r/   r!   )r   r!   r!   r"   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   T)rG   rP   r   r   Zlevel_2r/   r-   rx   )r   r   r   r1   r   int_r   r   )rP   expected_labelr   r   r   r   r    r!   r!   r"   test_mixed_groupings  s"    	     	r  ztest, columns, expected_namesrepeatZabbder   dr   er   r%   level_1cc           
      C   s   t dddddgdddd	d
gg|d}ddg}dtjddgtjddg}|j||d }|rtdtj||ddd}t	
|| n@dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )Nr   r+   r@   r-   r0   rB   r   r   r/   r$   r   )r   r   r-   r+   r@   r0   )rB   r   r/   r   r   r$   r   r   r   r  r  r   rJ   r   r   c                 S   s   g | ]}t |d g qS )r   )r5   r   r!   r!   r"   r     s     z0test_column_label_duplicates.<locals>.<listcomp>r	  )r   r1   r   int64r   r   r	   r   r   r   r   r5   appendr   )
testr   Zexpected_namesr   r   r   rW   r   r    Zexpected_columnsr!   r!   r"   test_column_label_duplicates  s(    $
r  znormalize, expected_labelc              	   C   sZ   t dddggdd|gdjddd}d	| d
}tjt|d |j| d W 5 Q R X d S )Nr   rB   r+   r   r   r   Fr  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rP   r  r   msgr!   r!   r"   test_result_label_duplicates  s    	 r  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nr   r   r   rB   rJ   r   r   )r   r   r1   r   r  r   r	   r   r   r   r   )r   r   r   r    r!   r!   r"   test_ambiguous_grouping  s      r  c               	   C   sZ   t dddgdddgddddgd	} d
}tjt|d | djdgd W 5 Q R X d S )Nr   r   r
  r   yc1c2r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r  r   r   r   r   r   r   r   r   r  r!   r!   r"   "test_subset_overlaps_gb_key_raises  s    $r  c               	   C   sZ   t dddgdddgddddgd	} d
}tjt|d | djdgd W 5 Q R X d S )Nr   r   r
  r   r  r  r   r   r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r  c3r   r  r  r!   r!   r"   !test_subset_doesnt_exist_in_frame  s    $r  c                  C   sv   t dddgdddgddddgd	} | jdd
jdgd}tddgtjddgddggd dgddd}t|| d S )Nr   r   r
  r   r  r  r   r   r   r   r  r   rB   rJ   r   r   r   r   r   r	   r   rN   r   r   r   r   r    r!   r!   r"   test_subset  s    $r  c                  C   s   t dddgdddgdddggdddgdddgd	} | jdd
jdgd}tddgtjddgddgddggd ddgddd}t|| d S )Nr   r   r   r  r   r   r  r  )r   r   r   r   rB   rJ   r   r   r  r  r!   r!   r"   test_subset_duplicate_columns  s     r  c              
   C   s   t dddddddgddd	d	d
d
dgddg}t|d | dd|d< |tddd}| }tddddg| d}|d  }t||dd	d
dggddddddgtdddddddggdddgd}t	d|dd }t
|| d S )!Nr^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   r+   rj   rl   rm   ro   rp   rq   z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)r]   r   r   rB   r   rk   r   r   r   )r   rt   r   r   r   r   uniquer   rM   r	   r   r   )r]   r   r   r   datesZ
timestampsr   r    r!   r!   r"   test_value_counts_time_grouper  s:    	
 $r!  )M__doc__	itertoolsr   numpyr1   r   Zpandasr   r   r   r   r   r   r	   r
   r   Zpandas._testingZ_testingr   r#   r>   ZbinnedrD   r9   r:   r;   r   ZarangemaxrQ   rW   kr   r  markZslowZparametrizer\   rv   r}   r   r   r   Zfixturer   r   r   r   r   r   r   r   r   r   r   r   rN   r7   r   r   r  r   r   r   r   r   r  r  r5   r  r  r  r  r  r  r  r!  r!   r!   r!   r"   <module>   s  ,$ 




>***


""


	
 & 0#& & &% & %& 0


