U
    tdT                     @   s~   d Z ddlZdZG dd dZG dd dZG dd	 d	Zd
d ZdddZdddZdddZ	d ddZ
dd Zdd ZdS )!z$
Statistics related stuff in igraph
    N)FittedPowerLaw	HistogramRunningMeanmeanmedian
percentilequantilepower_law_fitc                   @   s2   e Zd ZdZdd Zdd Zdd Zdd	d
ZdS )r   a  Result of fitting a power-law to a vector of samples

    Example:

        >>> result = power_law_fit([1, 2, 3, 4, 5, 6])
        >>> result                   # doctest:+ELLIPSIS
        FittedPowerLaw(continuous=False, alpha=2.42..., xmin=3.0, L=-7.54..., D=0.21..., p=0.993...)
        >>> print(result)            # doctest:+ELLIPSIS
        Fitted power-law distribution on discrete data
        <BLANKLINE>
        Exponent (alpha)  = 2.42...
        Cutoff (xmin)     = 3.000000
        <BLANKLINE>
        Log-likelihood    = -7.54...
        <BLANKLINE>
        H0: data was drawn from the fitted distribution
        <BLANKLINE>
        KS test statistic = 0.21...
        p-value           = 0.993...
        <BLANKLINE>
        H0 could not be rejected at significance level 0.05
        >>> result.alpha             # doctest:+ELLIPSIS
        2.42...
        >>> result.xmin
        3.0
        >>> result.continuous
        False
    c                 C   s(   || _ || _|| _|| _|| _|| _d S N)
continuousxminalphaLDp)selfr   r   r   r   r   r    r   J/home/sam/Atlas/atlas_env/lib/python3.8/site-packages/igraph/statistics.py__init__4   s    zFittedPowerLaw.__init__c                 C   s&   d| j j| j| j| j| j| j| jf S )Nz6%s(continuous=%r, alpha=%r, xmin=%r, L=%r, D=%r, p=%r))	__class____name__r   r   r   r   r   r   r   r   r   r   __repr__<   s    zFittedPowerLaw.__repr__c                 C   s   | j ddS )N皙?)significance)summaryr   r   r   r   __str__G   s    zFittedPowerLaw.__str__r   c                 C   s   ddt | j  g}|d |d| j  |d| j  |d |d| j  |d |d |d |d| j  |d	| j  |d | j|k r|d
|  n|d|  d|S )a  Returns the summary of the power law fit.

        @param significance: the significance level of the Kolmogorov-Smirnov test
          used to decide whether the input data could have come from the fitted
          distribution
        @return: the summary as a string
        z(Fitted power-law distribution on %s data)discreter    zExponent (alpha)  = %fzCutoff (xmin)     = %fzLog-likelihood    = %fz/H0: data was drawn from the fitted distributionzKS test statistic = %fzp-value           = %fz$H0 rejected at significance level %gz1H0 could not be rejected at significance level %g
)	boolr   appendr   r   r   r   r   join)r   r   resultr   r   r   r   J   s*    	






zFittedPowerLaw.summaryN)r   )r   
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s
   r   c                   @   s   e Zd ZdZd!ddZd"ddZed	d
 Zedd Zedd Z	edd Z
d#ddZdd ZeZdd Zdd Zdd Zd$ddZdd  ZdS )%r   a4  Generic histogram class for real numbers

    Example:

        >>> h = Histogram(5)     # Initializing, bin width = 5
        >>> h << [2,3,2,7,8,5,5,0,7,9]     # Adding more items
        >>> print(h)
        N = 10, mean +- sd: 4.8000 +- 2.9740
        [ 0,  5): **** (4)
        [ 5, 10): ****** (6)
       Nc                 C   s>   t || _d| _d\| _| _t | _|   |r:| | dS )zInitializes the histogram with the given data set.

        @param bin_width: the bin width of the histogram.
        @param data: the data set to be used. Must contain real numbers.
        NNN)	float
_bin_width_bins_min_maxr   _running_meanclearadd_many)r   Z	bin_widthdatar   r   r   r   x   s    
zHistogram.__init__Fc                 C   s*  t | jdkrL|sd}n0t|| j | j | _| j| j | _dg| _d}|S || jkrt|| j | j }|t | jk r||S |sdS |t | j d }| jdg|  | jt | j| j  | _|S |sdS tt| j| | j }dg| | jdd< |  j|| j 8  _| jt | j| j  | _dS )a-  Returns the bin index corresponding to the given number.

        @param num: the number for which the bin is being sought
        @param create: whether to create a new bin if no bin exists yet.
        @return: the index of the bin or C{None} if no bin exists yet and
          {create} is C{False}.r   Nr'   )	lenr+   intr*   r,   r-   extendmathceil)r   numcreater#   binidxZ
extra_binsr   r   r   _get_bin   s2    
zHistogram._get_binc                 C   s
   t | jS )z/Returns the number of elements in the histogram)r2   r.   r   r   r   r   n   s    zHistogram.nc                 C   s   | j jS )z1Returns the mean of the elements in the histogram)r.   r   r   r   r   r   r      s    zHistogram.meanc                 C   s   | j jS )zGReturns the standard deviation of the elements in
        the histogram)r.   sdr   r   r   r   r<      s    zHistogram.sdc                 C   s   | j jS )z5Returns the variance of the elements in the histogram)r.   varr   r   r   r   r=      s    zHistogram.varc                 C   s8   t |}| |d}| j|  |7  < | j|| dS )zAdds a single number to the histogram.

        @param num: the number to be added
        @param repeat: number of repeated additions
        TN)r)   r:   r+   r.   add)r   r7   repeatr9   r   r   r   r>      s    zHistogram.addc                 C   sD   zt |}W n tk
r*   t |g}Y nX |D ]}| | q0dS )zpAdds a single number or the elements of an iterable to the histogram.

        @param data: the data to be addedNiter	TypeErrorr>   )r   r1   iteratorxr   r   r   r0      s    zHistogram.add_manyc                 C   s   g | _ d\| _| _t | _dS )zClears the collected datar(   N)r+   r,   r-   r   r.   r   r   r   r   r/      s    zHistogram.clearc                 c   s2   | j }| jD ] }||| j |fV  || j7 }qdS )zGenerator returning the bins of the histogram in increasing order

        @return: a tuple with the following elements: left bound, right bound,
          number of elements in the binN)r,   r+   r*   )r   rD   elemr   r   r   bins   s    
zHistogram.binsc                 K   s.   ddl m} || ||}|j| f| dS )zPlotting supportr   )DrawerDirectoryN)Zigraph.drawingrG   resolveZdraw)r   backendcontextkwdsrG   Zdrawerr   r   r   __plot__   s    zHistogram.__plot__N   Tc              	   C   s  | j dks| jdkrdS t| j | j kr>t| j| jkr>d}nd}tt|| j  t|| j }dt| |dd  }d||f }|rt| j}|rtt|}||d|  | d	  }	n||d|  d
  }	t|	d}	d| j| j	| j
f g}
|r|	dkr|
d|	  |rR|d7 }|  D ]*\}}}|
|||d||	  |f  q$n2|  D ](\}}}|
|||d||	  f  qZn0|r|  D ] \}}}|
||||f  qd|
S )a  Returns the string representation of the histogram.

        @param max_width: the maximal width of each line of the string
          This value may not be obeyed if it is too small.
        @param show_bars: specify whether the histogram bars should be shown
        @param show_counts: specify whether the histogram counts should be
          shown. If both I{show_bars} and I{show_counts} are C{False},
          only a general descriptive statistics (number of elements, mean and
          standard deviation) is shown.
        NzN = 0z%dz%.3f%r'   z[%s, %s): %%s   	      z N = %d, mean +- sd: %.4f +- %.4fzEach * represents %d itemsz (%d)*r   )r,   r-   r3   r*   maxr2   strr+   r;   r   r<   r!   rF   r"   )r   	max_widthZ	show_barsZshow_countsZnumber_formatZ
num_lengthformat_stringmaxvalZmaxval_lengthscaler#   leftrightZcntr   r   r   	to_string   s>     



"zHistogram.to_stringc                 C   s   |   S r
   )r[   r   r   r   r   r   (  s    zHistogram.__str__)r'   N)F)r'   )rM   TT)r   r$   r%   r&   r   r:   propertyr;   r   r<   r=   r>   r0   
__lshift__r/   rF   rL   r[   r   r   r   r   r   r   k   s&   

%






8r   c                   @   s   e Zd ZdZd!ddZd"ddZd	d
 Zdd Zedd Z	edd Z
edd Zedd Zdd Zdd ZeZdd Zdd Zdd Zdd  ZdS )#r   ax  Running mean calculator.

    This class can be used to calculate the mean of elements from a
    list, tuple, iterable or any other data source. The mean is
    calculated on the fly without explicitly summing the values,
    so it can be used for data sets with arbitrary item count. Also
    capable of returning the standard deviation (also calculated on
    the fly)
    N        c                 C   s   |dk	r<|dks |dks |dkr(t d|   | | nNt|| _t|| _|dkr~t|d t|d  | _t|| _nd| _d| _dS )ag  RunningMean(items=None, n=0.0, mean=0.0, sd=0.0)

        Initializes the running mean calculator.

        There are two possible ways to initialize the calculator.
        First, one can provide an iterable of items; alternatively,
        one can specify the number of items, the mean and the
        standard deviation if we want to continue an interrupted
        calculation.

        @param items: the items that are used to initialize the
          running mean calcuator. If C{items} is given, C{n},
          C{mean} and C{sd} must be zeros.
        @param n: the initial number of elements already processed.
          If this is given, C{items} must be C{None}.
        @param mean: the initial mean. If this is given, C{items}
          must be C{None}.
        @param sd: the initial standard deviation. If this is given,
          C{items} must be C{None}.Nr   z1n, mean and sd must be zeros if items is not Noner'   rO   r^   )
ValueErrorr/   r0   r)   _nitems_mean_sqdiff_sd)r   itemsr;   r   r<   r   r   r   r   7  s    

zRunningMean.__init__r'   c                 C   sx   t |}|  j|7  _|| j }|  j|| | j 7  _|  j|| || j  7  _| jdkrt| j| jd  d | _dS )zRunningMean.add(value, repeat=1)

        Adds the given value to the elements from which we calculate
        the mean and the standard deviation.

        @param value: the element to be added
        @param repeat: number of repeated additions
        r'         ?N)r3   r`   ra   rb   rc   )r   valuer?   deltar   r   r   r>   Z  s    	

zRunningMean.addc                 C   sD   zt |}W n tk
r*   t |g}Y nX |D ]}| | q0dS )a   RunningMean.add(values)

        Adds the values in the given iterable to the elements from
        which we calculate the mean. Can also accept a single number.
        The left shift (C{<<}) operator is aliased to this function,
        so you can use it to add elements as well:

          >>> rm=RunningMean()
          >>> rm << [1,2,3,4]
          >>> rm.result               # doctest:+ELLIPSIS
          (2.5, 1.290994...)

        @param values: the element(s) to be added
        @type values: iterableNr@   )r   valuesrC   rf   r   r   r   r0   k  s    zRunningMean.add_manyc                 C   s   d\| _ | _d\| _| _dS )z#Resets the running mean calculator.)r^   r^   N)r`   ra   rb   rc   r   r   r   r   r/     s    zRunningMean.clearc                 C   s   | j | jfS )z:Returns the current mean and standard deviation as a tuple)ra   rc   r   r   r   r   r#     s    zRunningMean.resultc                 C   s   | j S )zReturns the current mean)ra   r   r   r   r   r     s    zRunningMean.meanc                 C   s   | j S )z&Returns the current standard deviationrc   r   r   r   r   r<     s    zRunningMean.sdc                 C   s
   | j d S )zReturns the current variationrO   ri   r   r   r   r   r=     s    zRunningMean.varc                 C   s   d| j jt| j| j| jf S )Nz%s(n=%r, mean=%r, sd=%r))r   r   r3   r`   ra   rc   r   r   r   r   r     s    zRunningMean.__repr__c                 C   s   d| j | j| jf S )NzRunning mean (N=%d, %f +- %f))r`   ra   rc   r   r   r   r   r     s    zRunningMean.__str__c                 C   s
   t | jS r
   )r)   ra   r   r   r   r   	__float__  s    zRunningMean.__float__c                 C   s
   t | jS r
   )r3   ra   r   r   r   r   __int__  s    zRunningMean.__int__c                 C   s
   t | jS r
   )complexra   r   r   r   r   __complex__  s    zRunningMean.__complex__c                 C   s
   t | jS r
   )r3   r`   r   r   r   r   __len__  s    zRunningMean.__len__)Nr^   r^   r^   )r'   )r   r$   r%   r&   r   r>   r0   r/   r\   r#   r   r<   r=   r   r   r]   rj   rk   rm   rn   r   r   r   r   r   ,  s(   

#




r   c                 C   s
   t | jS )a  Returns the mean of an iterable.

    Example:

        >>> mean([1, 4, 7, 11])
        5.75

    @param xs: an iterable yielding numbers.
    @return: the mean of the numbers provided by the iterable.

    @see: RunningMean() if you also need the variance or the standard deviation
    )r   r   xsr   r   r   r     s    r   Tc                 C   sX   |rt | } tt| d }d| t| krHt| |d  | |  d S t| | S dS )aH  Returns the median of an unsorted or sorted numeric vector.

    @param xs: the vector itself.
    @param sort: whether to sort the vector. If you know that the vector is
      sorted already, pass C{False} here.
    @return: the median, which will always be a float, even if the vector
      contained integers originally.
    rO   r'   N)sortedr3   r2   r)   )rp   sortmidr   r   r   r     s    	r      2   K   c                 C   s0   t |dr t| dd |D |S t| |d |S )a(  Returns the pth percentile of an unsorted or sorted numeric vector.

    This is equivalent to calling quantile(xs, p/100.0); see L{quantile}
    for more details on the calculation.

    Example:

        >>> round(percentile([15, 20, 40, 35, 50], 40), 2)
        26.0
        >>> for perc in percentile([15, 20, 40, 35, 50], (0, 25, 50, 75, 100)):
        ...     print("%.2f" % perc)
        ...
        15.00
        17.50
        35.00
        45.00
        50.00

    @param xs: the vector itself.
    @param p: the percentile we are looking for. It may also be a list if you
      want to calculate multiple quantiles with a single call. The default
      value calculates the 25th, 50th and 75th percentile.
    @param sort: whether to sort the vector. If you know that the vector is
      sorted already, pass C{False} here.
    @return: the pth percentile, which will always be a float, even if the vector
      contained integers originally. If p is a list, the result will also be a
      list containing the percentiles for each item in the list.
    __iter__c                 s   s   | ]}|d  V  qdS )      Y@Nr   ).0rD   r   r   r   	<genexpr>  s     zpercentile.<locals>.<genexpr>ry   )hasattrr   )rp   r   rr   r   r   r   r     s    
r   auto{Gz?c                 C   sV   ddl m} |dks|dk r d}| }|dkr<td| |dk}t|| ||| S )a  Fitting a power-law distribution to empirical data

    @param data: the data to fit, a list containing integer values
    @param xmin: the lower bound for fitting the power-law. If C{None},
      the optimal xmin value will be estimated as well. Zero means that
      the smallest possible xmin value will be used.
    @param method: the fitting method to use. The following methods are
      implemented so far:

        - C{continuous}, C{hill}: exact maximum likelihood estimation
          when the input data comes from a continuous scale. This is
          known as the Hill estimator. The statistical error of
          this estimator is M{(alpha-1) / sqrt(n)}, where alpha is the
          estimated exponent and M{n} is the number of data points above
          M{xmin}. The estimator is known to exhibit a small finite
          sample-size bias of order M{O(n^-1)}, which is small when
          M{n > 100}. igraph will try to compensate for the finite sample
          size if n is small.

        - C{discrete}: exact maximum likelihood estimation when the
          input comes from a discrete scale (see Clauset et al among the
          references).

        - C{auto}: exact maximum likelihood estimation where the continuous
          method is used if the input vector contains at least one fractional
          value and the discrete method is used if the input vector contains
          integers only.
    @param p_precision: desired precision of the p-value calculation. The
      precision ultimately depends on the number of resampling attempts. The
      number of resampling trials is determined by 0.25 divided by the square
      of the required precision. For instance, a required precision of 0.01
      means that 2500 samples will be drawn.

    @return: a L{FittedPowerLaw} object. The fitted C{xmin} value and the
      power-law exponent can be queried from the C{xmin} and C{alpha}
      properties of the returned object.

    @newfield ref: Reference
    @ref: MEJ Newman: Power laws, Pareto distributions and Zipf's law.
      Contemporary Physics 46, 323-351 (2005)
    @ref: A Clauset, CR Shalizi, MEJ Newman: Power-law distributions
      in empirical data. E-print (2007). arXiv:0706.1062
    r   )_power_law_fitN)r   hillr   r}   zunknown method: %s)r   r   )Zigraph._igraphr   lowerr_   r   )r1   r   methodZp_precisionr   Zforce_continuousr   r   r   r	     s    ,r	   g      ?re   g      ?c           	      C   s   | st d|rt| } t|dr,|}d}n
|g}d}g }|D ]}|dk sR|dkrZt dt|t| d  }t||t|  }}|t| kr|| d  q>|dk r|| d  q>|d| | |d   || |    q>|r|d }|S )	a  Returns the qth quantile of an unsorted or sorted numeric vector.

    There are a number of different ways to calculate the sample quantile. The
    method implemented by igraph is the one recommended by NIST. First we
    calculate a rank n as q(N+1), where N is the number of items in xs, then we
    split n into its integer component k and decimal component d. If k <= 1,
    we return the first element; if k >= N, we return the last element,
    otherwise we return the linear interpolation between xs[k-1] and xs[k]
    using a factor d.

    Example:

        >>> round(quantile([15, 20, 40, 35, 50], 0.4), 2)
        26.0

    @param xs: the vector itself.
    @param q: the quantile we are looking for. It may also be a list if you
      want to calculate multiple quantiles with a single call. The default
      value calculates the 25th, 50th and 75th percentile.
    @param sort: whether to sort the vector. If you know that the vector is
      sorted already, pass C{False} here.
    @return: the qth quantile, which will always be a float, even if the vector
      contained integers originally. If q is a list, the result will also be a
      list containing the quantiles for each item in the list.
    zxs must not be emptyrx   FTr   r'   zq must be between 0 and 1r   )r_   rq   r|   r)   r2   r3   r!   )	rp   qrr   qsZreturn_singler#   r;   kdr   r   r   r   2  s.    
(r   c                 C   s
   t | jS )a9  Returns the standard deviation of an iterable.

    Example:

        >>> sd([1, 4, 7, 11])       #doctest:+ELLIPSIS
        4.2720...

    @param xs: an iterable yielding numbers.
    @return: the standard deviation of the numbers provided by the iterable.

    @see: RunningMean() if you also need the mean
    )r   r<   ro   r   r   r   r<   j  s    r<   c                 C   s
   t | jS )a.  Returns the variance of an iterable.

    Example:

        >>> var([1, 4, 8, 11])            #doctest:+ELLIPSIS
        19.333333...

    @param xs: an iterable yielding numbers.
    @return: the variance of the numbers provided by the iterable.

    @see: RunningMean() if you also need the mean
    )r   r=   ro   r   r   r   r=   z  s    r=   )T)rt   T)Nr}   r~   )r   T)r&   r5   __all__r   r   r   r   r   r   r	   r   r<   r=   r   r   r   r   <module>   s   V B 	

"
9
8