a
    a9                     @   s   d Z g dZddlZddlmZmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZ eg d	dd
fddZd#ddZeg d	dfddZd$ddZg d	dfddZg d	ddfddZd%ddZd&ddZd'dd Zd(d!d"ZdS ))zB
Additional statistics functions with support for masked arrays.

)
compare_medians_mshdquantileshdmedianhdquantiles_sdidealfourthsmedian_cihsmjcimquantiles_cimjrshtrimmed_mean_ci    N)float_int_ndarray)MaskedArray   )mstats_basic)normbetatbinom)g      ?      ?g      ?Fc                 C   s   dd }t j| dtd} tj|ddd}|du s:| jdkrH|| ||}n*| jdkr`td	| j t ||| ||}t j|dd
S )a  
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    See Also
    --------
    hdquantiles_sd

    c                 S   sJ  t t |  t}|j}t dt|ft	}|dk rTt j
|_|rL|S |d S t |d t| }tj}t|D ]t\}}	|||d |	 |d d|	  }
|
dd |
dd  }t ||}||d|f< t ||| d |d|f< qx|d |d|dkf< |d |d|dkf< |rBt j
 |d|dkf< |d|dkf< |S |d S )zGComputes the HD quantiles for a 1D array. Returns nan for invalid data.   r   r   N)npsqueezesort
compressedviewr   sizeemptylenr   nanflatarangefloatr   cdf	enumeratedot)dataprobvarxsortednZhdvbetacdfip_wwZhd_mean r3   i/Users/vegardjervell/Documents/master/model/venv/lib/python3.9/site-packages/scipy/stats/mstats_extras.py_hd_1D;   s,     "zhdquantiles.<locals>._hd_1DFcopydtyper   r7   ZndminNr   DArray 'data' must be at most two dimensional, but got data.ndim = %dr7   )maarrayr   r   ndim
ValueErrorapply_along_axisfix_invalid)r(   r)   axisr*   r5   r0   resultr3   r3   r4   r      s    
r   r   c                 C   s   t | dg||d}| S )a9  
    Returns the Harrell-Davis estimate of the median along the given axis.

    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdmedian : MaskedArray
        The median values.  If ``var=True``, the variance is returned inside
        the masked array.  E.g. for a 1-D array the shape change from (1,) to
        (2,).

    r   )rB   r*   )r   r   )r(   rB   r*   rC   r3   r3   r4   r   g   s    r   c                 C   sv   dd }t j| dtd} tj|ddd}|du r<|| |}n(| jdkrTtd	| j t ||| |}t j|dd
 S )a  
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    c                    s  t |  t}t t|t}|dk r6t j|_t |t	|d  }t
j}t|D ]\}}|||d | |d d|  }|dd |dd   t j fddt|D td}	t j|	 ddd	| t	|d  }
t	|d t t |
 t	|  ||< qZ|S )
z%Computes the std error for 1D arrays.r   r   Nr   c                    s@   g | ]8} d | d |   |d  |d d    qS )Nr   r3   ).0kr2   r+   r3   r4   
<listcomp>   s   z4hdquantiles_sd.<locals>._hdsd_1D.<locals>.<listcomp>r8   Fr9   )r   r   r   r    r   r   r!   r"   r#   r$   r   r%   r&   Zfromiterranger=   r*   sqrtZdiagZdiagonal)r(   r)   r,   Zhdsdvvr.   r/   r0   r1   Zmx_Zmx_varr3   rF   r4   _hdsd_1D   s$     $.z hdquantiles_sd.<locals>._hdsd_1DFr6   r   r9   Nr   r:   r;   )	r<   r=   r   r   r>   r?   r@   rA   Zravel)r(   r)   rB   rL   r0   rC   r3   r3   r4   r      s    
r   皙?rN   TT皙?c           
      C   s|   t j| dd} tj| |||d}||}tj| |||d}||d }td|d  |}	t	||	|  ||	|  fS )a  
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    Fr;   )limits	inclusiverB   r          @)
r<   r=   mstatsZtrimrmeanZtrimmed_stdecountr   ppfr   )
r(   rQ   rR   alpharB   ZtrimmedZtmeanZtstdeZdfZtppfr3   r3   r4   r
      s    *
r
   c                 C   sd   dd }t j| dd} | jdkr.td| j tj|ddd}|d	u rP|| |S t ||| |S d	S )
a  
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    c                 S   s   t |  } | j}t || d t}tj}t 	t
|t}t jd|d td| }|d|  }t|D ]b\}}	|||	d ||	 |||	d ||	  }
t |
| }t |
| d }t ||d  ||< qn|S )Nr   r   rH   g      ?r   )r   r   r   r   r=   Zastyper   r   r%   r   r    r   r#   r&   r'   rJ   )r(   r0   r,   r)   r.   Zmjxyr/   mWZC1ZC2r3   r3   r4   _mjci_1D   s    (zmjci.<locals>._mjci_1DFr;   r   r:   r   r9   N)r<   r=   r>   r?   r   r@   )r(   r)   rB   r]   r0   r3   r3   r4   r      s    

r   c                 C   sZ   t |d| }td|d  }tj| |dd|d}t| ||d}|||  |||  fS )a  
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles.
        If None, use a flattened array.

    Returns
    -------
    ci_lower : ndarray
        The lower boundaries of the confidence interval.  Of the same length as
        `prob`.
    ci_upper : ndarray
        The upper boundaries of the confidence interval.  Of the same length as
        `prob`.

    r   rS   r   )ZalphapZbetaprB   rB   )minr   rW   rT   Z
mquantilesr   )r(   r)   rX   rB   zZxqZsmjr3   r3   r4   r     s
    r   c                 C   sV   dd }t j| dd} |du r*|| |}n(| jdkrBtd| j t ||| |}|S )aA  
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    c           	      S   s>  t |  } t| }t|d| }tt|d |d}t|| |dt|d |d }|d| k r|d8 }t|| |dt|d |d }t|| d |dt||d }|d | ||  }|| | t	||d|  |   }|| |  d| | |d    || || d   d| | ||    f}|S )Nr   rS   r   r   )
r   r   r   r    r_   intr   Z_ppfr%   r$   )	r(   rX   r,   rE   ZgkZgkkIlambdZlimsr3   r3   r4   _cihs_1DV  s    $$$$&zmedian_cihs.<locals>._cihs_1DFr;   Nr   r:   )r<   r=   r>   r?   r@   )r(   rX   rB   rd   rC   r3   r3   r4   r   ?  s    
r   c                 C   sn   t j| |dt j||d }}tj| |dtj||d }}t|| t |d |d   }dt| S )a+  
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    r^   r   r   )	r<   ZmedianrT   Zstde_medianr   absrJ   r   r%   )Zgroup_1Zgroup_2rB   Zmed_1Zmed_2Zstd_1Zstd_2r\   r3   r3   r4   r   r  s    $r   c                 C   s>   dd }t j| |dt} |du r,|| S t ||| S dS )aC  
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    c                 S   s   |   }t|}|dk r$tjtjgS t|d d d\}}t|}d| ||d   |||   }|| }d| ||  |||d    }||gS )N   g      @g?r   )r   r    r   r!   divmodra   )r(   rY   r,   jhZqlorE   Zqupr3   r3   r4   _idf  s      zidealfourths.<locals>._idfr^   N)r<   r   r   r   r@   )r(   rB   rj   r3   r3   r4   r     s
    r   c                 C   s   t j| dd} |du r| }ntj|ddd}| jdkr>td|  }t| dd}d|d	 |d
   |d  }| dddf |dddf | kd
}| dddf |dddf | k d
}|| d| |  S )a  
    Evaluates Rosenblatt's shifted histogram estimators for each data point.

    Rosenblatt's estimator is a centered finite-difference approximation to the
    derivative of the empirical cumulative distribution function.

    Parameters
    ----------
    data : sequence
        Input data, should be 1-D. Masked values are ignored.
    points : sequence or None, optional
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.

    Fr;   Nr   r9   z#The input array should be 1D only !r^   g333333?r   r   rN   rS   )r<   r=   r   r>   AttributeErrorrV   r   sum)r(   Zpointsr,   rri   ZnhiZnlor3   r3   r4   r	     s    
**r	   )r   F)rM   rO   rP   N)rP   N)N)N)N)__doc____all__Znumpyr   r   r   r   Znumpy.mar<   r    r   rT   Zscipy.stats.distributionsr   r   r   r   listr   r   r   r
   r   r   r   r   r   r	   r3   r3   r3   r4   <module>   s&   K
<  
3-"
3
!
(