a
    ag                     @   s  d dl Z d dlZd dlZd dlZd dlZg dZe dZe dZ	e dZ
e dZe dZe dZe d	Ze d
Ze dZe dZe dZG dd deZG dd deZG dd dZG dd deZG dd deZG dd deZG dd deZG dd deZdd Zd d! Zd"d# Zd?d$d%Zd&d' Z d(d) Z!d*d+ Z"d,d- Z#d.d/ Z$G d0d1 d1Z%d2d3 Z&d4d5 Z'd6d7 Z(d8d9 Z)d:d; Z*d<e*_+e,d=krd dl-Z-e-j.d> Z/e*e/ dS )@    N)MetaDataloadarff	ArffErrorParseArffErrorz^\s*@z^%z^\s+$z^\s*@\S*z^@[Dd][Aa][Tt][Aa]z*^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)z2^\s*@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)z{(.+)}z%[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$z'(..+)'\s+(..+$)z(\S+)\s+(..+$)c                   @   s   e Zd ZdS )r   N__name__
__module____qualname__ r
   r
   f/Users/vegardjervell/Documents/master/model/venv/lib/python3.9/site-packages/scipy/io/arff/arffread.pyr   6   s   r   c                   @   s   e Zd ZdS )r   Nr   r
   r
   r
   r   r   :   s   r   c                   @   s4   e Zd ZdZdd Zedd Zdd Zdd	 ZdS )
	AttributeNc                 C   s   || _ d | _tj| _d S N)namerangenpobject_dtypeselfr   r
   r
   r   __init__E   s    zAttribute.__init__c                 C   s   dS )zj
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.
        Nr
   clsr   attr_stringr
   r
   r   parse_attributeJ   s    zAttribute.parse_attributec                 C   s   dS )-
        Parse a value of this type.
        Nr
   r   data_strr
   r
   r   
parse_dataR   s    zAttribute.parse_datac                 C   s   | j d | j S )r   ,)r   	type_namer   r
   r
   r   __str__X   s    zAttribute.__str__)	r   r   r	   r   r   classmethodr   r   r!   r
   r
   r
   r   r   A   s   
r   c                       sH   e Zd ZdZ fddZedd Zedd Zdd	 Z	d
d Z
  ZS )NominalAttributeZnominalc                    s6   t  | || _|| _tjtdd |D f| _d S )Nc                 s   s   | ]}t |V  qd S r   )len.0ir
   r
   r   	<genexpr>g       z,NominalAttribute.__init__.<locals>.<genexpr>)superr   valuesr   r   Zstring_maxr   )r   r   r+   	__class__r
   r   r   c   s    zNominalAttribute.__init__c                 C   s4   t | }|r(t|d\}}t|S tddS )a  Given a string containing a nominal type, returns a tuple of the
        possible values.

        A nominal type is defined as something framed between braces ({}).

        Parameters
        ----------
        atrv : str
           Nominal type definition

        Returns
        -------
        poss_vals : tuple
           possible values

        Examples
        --------
        >>> get_nom_val("{floup, bouga, fl, ratata}")
        ('floup', 'bouga', 'fl', 'ratata')
           z(This does not look like a nominal stringN)	r_nominalmatchsplit_data_linegrouptuple
ValueError)atrvmattrs_r
   r
   r   _get_nom_vali   s
    
zNominalAttribute._get_nom_valc                 C   s(   |d dkr |  |}| ||S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For nominal attributes, the attribute string would be like '{<attr_1>,
         <attr2>, <attr_3>}'.
        r   {N)r:   )r   r   r   r+   r
   r
   r   r      s    	

z NominalAttribute.parse_attributec                 C   s8   || j v r|S |dkr|S tdt|t| j f dS )r   ?z%s value not in %sN)r+   r5   strr   r
   r
   r   r      s    

zNominalAttribute.parse_datac                 C   sN   | j d }tt| jd D ]}|| j| d 7 }q|| jd 7 }|d7 }|S )Nz,{r/   r   })r   r   r$   r+   r   msgr'   r
   r
   r   r!      s    
zNominalAttribute.__str__)r   r   r	   r   r   staticmethodr:   r"   r   r   r!   __classcell__r
   r
   r-   r   r#   _   s   

r#   c                       s8   e Zd Z fddZedd Zdd Zdd Z  ZS )	NumericAttributec                    s   t  | d| _tj| _d S )Nnumeric)r*   r   r   r   Zfloat_r   r   r-   r
   r   r      s    zNumericAttribute.__init__c                 C   sX   |   }|dtd dksH|dtd dksH|dtd dkrP| |S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For numeric attributes, the attribute string would be like
        'numeric' or 'int' or 'real'.
        NrE   intreallowerstripr$   r   r
   r
   r   r      s    
z NumericAttribute.parse_attributec                 C   s   d|v rt jS t|S dS )a  
        Parse a value of this type.

        Parameters
        ----------
        data_str : str
           string to convert

        Returns
        -------
        f : float
           where float can be nan

        Examples
        --------
        >>> atr = NumericAttribute('atr')
        >>> atr.parse_data('1')
        1.0
        >>> atr.parse_data('1\n')
        1.0
        >>> atr.parse_data('?\n')
        nan
        r<   N)r   nanfloatr   r
   r
   r   r      s    zNumericAttribute.parse_datac                 C   s<   |j d |j d  }t|t|t|t|| fS Ng      ?r/   sizer   ZnanminZnanmaxmeanstd)r   datanbfacr
   r
   r   _basic_stats   s    zNumericAttribute._basic_stats)	r   r   r	   r   r"   r   r   rT   rC   r
   r
   r-   r   rD      s
   
rD   c                       s(   e Zd Z fddZedd Z  ZS )StringAttributec                    s   t  | d| _d S )Nstring)r*   r   r   r   r-   r
   r   r      s    zStringAttribute.__init__c                 C   s0   |   }|dtd dkr(| |S dS dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For string attributes, the attribute string would be like
        'string'.
        NrV   rH   r   r
   r
   r   r      s    
zStringAttribute.parse_attribute)r   r   r	   r   r"   r   rC   r
   r
   r-   r   rU      s   rU   c                       sH   e Zd Z fddZedd Zedd Zdd Z fd	d
Z	  Z
S )DateAttributec                    s8   t  | || _|| _d| _|| _td| j| _d S )Ndater   )	r*   r   date_formatdatetime_unitr   r   r   
datetime64r   )r   r   rY   rZ   r-   r
   r   r     s    zDateAttribute.__init__c                 C   s   t | }|r|d }d }d|v r:|dd}d}n|dd}d}d|v rb|dd}d	}d
|v rz|d
d}d}d|v r|dd}d}d|v r|dd}d}d|v r|dd}d}d|v sd|v rtd|d u rtd||fS tdd S )Nr/   Zyyyyz%YYyyz%yZMMz%mMddz%dDZHHz%Hhmmz%Mr7   ssz%SszZz6Date type attributes with time zone not supported, yetz"Invalid or unsupported date formatzInvalid or no date format)r_dater1   r3   rJ   replacer5   )r6   r7   patternrZ   r
   r
   r   _get_date_format	  s<    
zDateAttribute._get_date_formatc                 C   sB   |   }|dtd dkr:| |\}}| |||S dS dS )
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For date attributes, the attribute string would be like
        'date <format>'.
        NrX   )rI   rJ   r$   rj   )r   r   r   attr_string_lowerrY   rZ   r
   r
   r   r   0  s
    
zDateAttribute.parse_attributec                 C   sT   |   d d}|dkr*td| jS tj|| j}t|d| j S dS )r   '"r<   ZNaTzdatetime64[%s]N)rJ   r   r[   rZ   datetimestrptimerY   Zastype)r   r   Zdate_strdtr
   r
   r   r   B  s    
zDateAttribute.parse_datac                    s   t   d | j S )Nr   )r*   r!   rY   r    r-   r
   r   r!   N  s    zDateAttribute.__str__)r   r   r	   r   rB   rj   r"   r   r   r!   rC   r
   r
   r-   r   rW      s   
&
rW   c                       s<   e Zd Z fddZedd Zdd Z fddZ  ZS )	RelationalAttributec                    s*   t  | d| _tj| _g | _d | _d S )N
relational)r*   r   r   r   r   r   
attributesdialectr   r-   r
   r   r   T  s
    zRelationalAttribute.__init__c                 C   s0   |   }|dtd dkr(| |S dS dS )rk   Nrs   rH   )r   r   r   rl   r
   r
   r   r   [  s    
z#RelationalAttribute.parse_attributec                    s|   t ttj}| d}g }|dD ]4}t|j\ _|	t
 fdd|D  q.t|dd jD S )Nzunicode-escape
c                    s    g | ]}j |  | qS r
   )rt   r   r%   rowr   r
   r   
<listcomp>x  r)   z2RelationalAttribute.parse_data.<locals>.<listcomp>c                 S   s   g | ]}|j |jfqS r
   r   r   r&   ar
   r
   r   ry   {  r)   )listr   r$   rt   encodedecodesplitr2   ru   appendr4   r   array)r   r   elemsescaped_stringZ
row_tuplesrawr
   rw   r   r   l  s    zRelationalAttribute.parse_datac                    s$   t   d ddd | jD  S )Nz
	c                 s   s   | ]}t |V  qd S r   )r=   r{   r
   r
   r   r(     r)   z.RelationalAttribute.__str__.<locals>.<genexpr>)r*   r!   joinrt   r    r-   r
   r   r!   }  s    zRelationalAttribute.__str__)	r   r   r	   r   r"   r   r   r!   rC   r
   r
   r-   r   rr   R  s
   
rr   c                 C   sD   t ttttf}|D ] }|| |}|d ur|  S qtd| d S )Nzunknown attribute %s)r#   rD   rW   rU   rr   r   r   )r   r   Zattr_classesr   attrr
   r
   r   to_attribute  s    
r   c                  C   s8   t tdd} | du r4t d}|jdkt_tj} | S )zL
    Checks if the bug https://bugs.python.org/issue30157 is unpatched.
    has_bugNz3, 'a'rm   )getattrcsv_sniffer_has_bug_last_fieldcsvSniffersniff	quotecharr   )r   ru   r
   r
   r   r     s    r   c                 C   s   t  rd}dd|dfD ]*}t|tjtjB }|| }|r qBq||krNdS |j}t|dksdJ |d }|d d }	||	 }
|d	 d }	||	 }|d
 d }	t||	 }tdt	||
d tj}t|
| }|
|_||v r||_||_||_dS )zT
    Workaround for the bug https://bugs.python.org/issue30157 if is unpatched.
    zG(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)zI(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)zG(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)z-(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)Nr/   r   quotedelimspacez]((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$))r   r   )r   recompileDOTALL	MULTILINEfindall
groupindexr$   boolescapesearchr   	delimiterdoublequoteskipinitialspace)
sniff_lineru   
delimitersZright_regexrestrregexpmatchesr   r7   nr   r   r   	dq_regexpr   r
   r
   r   %workaround_csv_sniffer_bug_last_field  sD    
r   c                    s   d}t ttdjd   d dkr6 d d      }t fdd|D s`|d7 }|d u rt  j	||d}t
|||d	 tt  g|}||fS )
Nz,	r>      rv   c                 3   s   | ]}| v V  qd S r   r
   )r&   dliner
   r   r(     r)   z"split_data_line.<locals>.<genexpr>r   )r   )r   ru   r   )r   field_size_limitrF   ctypesc_ulongvaluerJ   anyr   r   r   nextreader)r   ru   r   r   rx   r
   r   r   r2     s     r2   c                 C   s   |  }t|}|rj|d}t|r@t|\}}t| }qvt|r`t|\}}t| }qvt	dnt	d| t
||}| dkrt| ||}||fS )ak  Parse a raw string in header (e.g., starts by @attribute).

    Given a raw string attribute, try to get the name and type of the
    attribute. Constraints:

    * The first line must start with @attribute (case insensitive, and
      space like characters before @attribute are allowed)
    * Works also if the attribute is spread on multilines.
    * Works if empty lines or comments are in between

    Parameters
    ----------
    attribute : str
       the attribute string.

    Returns
    -------
    name : str
       name of the attribute
    value : str
       value of the attribute
    next : str
       next line to be parsed

    Examples
    --------
    If attribute is a string defined in python as r"floupi real", will
    return floupi as name, and real as value.

    >>> iterable = iter([0] * 10) # dummy iterator
    >>> tokenize_attribute(iterable, r"@attribute floupi real")
    ('floupi', 'real', 0)

    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
    and real as value.

    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
    ('floupi 2', 'real', 0)

    r/   zmulti line not supported yetzFirst line unparsable: %srs   )rJ   r_attributer1   r3   r_comattrvaltokenize_single_commar   r_wcomattrvaltokenize_single_wcommar5   r   rI   read_relational_attribute)iterable	attributeZsattrZmattrr6   r   typeZ	next_itemr
   r
   r   tokenize_attribute  s     )







r   c              
   C   st   t | }|r`z |d }|d }W ql ty\ } ztd|W Y d }~qld }~0 0 ntd|  ||fS Nr/   r   z Error while tokenizing attributez Error while tokenizing single %s)r   r1   r3   rJ   
IndexErrorr5   valr7   r   r   er
   r
   r   r   :  s    
"r   c              
   C   st   t | }|r`z |d }|d }W ql ty\ } ztd|W Y d }~qld }~0 0 ntd|  ||fS r   )r   r1   r3   rJ   r   r5   r   r
   r
   r   r   I  s    
"r   c                 C   sz   t d|j d }||snt|}|rdt|}|rVt| |\}}|j| qlt	d| qt
| }qt
| }|S )z4Read the nested attributes of a relational attributez^@[Ee][Nn][Dd]\s*z\s*$Error parsing line %s)r   r   r   r1   r_headerliner   r   rt   r   r5   r   )ofileZrelational_attributer'   Zr_end_relationalr7   isattrr   r
   r
   r   r   X  s     



r   c                 C   s   t | }t|rt | }qd}g }t|st|}|rt|}|rdt| |\}}|| qt|}|r~|	d}nt
d| t | }q$t | }q$||fS )z&Read the header of the iterable ofile.Nr/   r   )r   	r_commentr1   
r_datametar   r   r   r   
r_relationr3   r5   )r   r'   Zrelationrt   r7   r   r   Zisrelr
   r
   r   read_headern  s&    







r   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )r   a  Small container to keep useful information on a ARFF dataset.

    Knows about attributes names and types.

    Examples
    --------
    ::

        data, meta = loadarff('iris.arff')
        # This will print the attributes names of the iris.arff dataset
        for i in meta:
            print(i)
        # This works too
        meta.names()
        # Getting attribute type
        types = meta.types()

    Methods
    -------
    names
    types

    Notes
    -----
    Also maintains the list of attributes in order, i.e., doing for i in
    meta, where meta is an instance of MetaData, will return the
    different attribute names in the order they were defined.
    c                 C   s   || _ dd |D | _d S )Nc                 S   s   i | ]}|j |qS r
   )r   r{   r
   r
   r   
<dictcomp>  r)   z%MetaData.__init__.<locals>.<dictcomp>)r   _attributes)r   relr   r
   r
   r   r     s    zMetaData.__init__c                 C   sf   d}|d| j  7 }| jD ]H}|d|| j| jf 7 }| j| jrX|dt| j| j 7 }|d7 }q|S )N zDataset: %s
z	%s's type is %sz, range is %srv   )r   r   r   r   r=   r@   r
   r
   r   __repr__  s    

zMetaData.__repr__c                 C   s
   t | jS r   )iterr   r    r
   r
   r   __iter__  s    zMetaData.__iter__c                 C   s   | j | }|j|jfS r   )r   r   r   )r   keyr   r
   r
   r   __getitem__  s    
zMetaData.__getitem__c                 C   s
   t | jS )zReturn the list of attribute names.

        Returns
        -------
        attrnames : list of str
            The attribute names.
        )r}   r   r    r
   r
   r   names  s    zMetaData.namesc                    s    fdd j D }|S )zReturn the list of attribute types.

        Returns
        -------
        attr_types : list of str
            The attribute types.
        c                    s   g | ]} j | jqS r
   )r   r   )r&   r   r    r
   r   ry     s   z"MetaData.types.<locals>.<listcomp>)r   )r   Z
attr_typesr
   r    r   types  s    
zMetaData.typesN)
r   r   r	   __doc__r   r   r   r   r   r   r
   r
   r
   r   r     s   

r   c                 C   sN   t | dr| }n
t| d}zt|W || ur4|  S n|| urH|  0 dS )a  
    Read an arff file.

    The data is returned as a record array, which can be accessed much like
    a dictionary of NumPy arrays. For example, if one of the attributes is
    called 'pressure', then its first 10 data points can be accessed from the
    ``data`` record array like so: ``data['pressure'][0:10]``


    Parameters
    ----------
    f : file-like or str
       File-like object to read from, or filename to open.

    Returns
    -------
    data : record array
       The data of the arff file, accessible by attribute names.
    meta : `MetaData`
       Contains information about the arff file such as name and
       type of attributes, the relation (name of the dataset), etc.

    Raises
    ------
    ParseArffError
        This is raised if the given file is not ARFF-formatted.
    NotImplementedError
        The ARFF file has an attribute which is not supported yet.

    Notes
    -----

    This function should be able to read most arff files. Not
    implemented functionality include:

    * date type attributes
    * string type attributes

    It can read files with numeric and nominal attributes. It cannot read
    files with sparse data ({} in the file). However, this function can
    read files with missing data (? in the file), representing the data
    points as NaNs.

    Examples
    --------
    >>> from scipy.io import arff
    >>> from io import StringIO
    >>> content = """
    ... @relation foo
    ... @attribute width  numeric
    ... @attribute height numeric
    ... @attribute color  {red,green,blue,yellow,black}
    ... @data
    ... 5.0,3.25,blue
    ... 4.5,3.75,green
    ... 3.0,4.00,red
    ... """
    >>> f = StringIO(content)
    >>> data, meta = arff.loadarff(f)
    >>> data
    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
    >>> meta
    Dataset: foo
    	width's type is numeric
    	height's type is numeric
    	color's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')

    readrtN)hasattropen	_loadarffclose)fr   r
   r
   r   r     s    F

 r   c           	   
      s   zt | \} W n: tyJ } z"dt| }t||W Y d }~n
d }~0 0 d} D ]}t|trTd}qTt| }|r~tdt d
 fdd	}t	|| }t
|dd	  D }||fS )Nz'Error while parsing header, error was: FTz*String attributes not supported yet, sorryr   c                 3   s\   t t}d }| D ]B}t|st|r.qt||\ }t fdd|D V  qd S )Nc                    s   g | ]} |  | qS r
   )r   r%   )r   rx   r
   r   ry   a  r)   z0_loadarff.<locals>.generator.<locals>.<listcomp>)r}   r   r   r1   r_emptyr2   r4   )Zrow_iterr   r   ru   r   r   ni)rx   r   	generatorG  s    z_loadarff.<locals>.generatorc                 S   s   g | ]}|j |jfqS r
   rz   r{   r
   r
   r   ry   e  r)   z_loadarff.<locals>.<listcomp>)r   )r   r5   r=   r   
isinstancerU   r   NotImplementedErrorr$   r}   r   r   )	r   r   r   rA   hasstrr|   metar   rR   r
   r   r   r   (  s"     


r   c                 C   s<   | j d | j d  }t| t| t| t| | fS rM   rN   )rR   rS   r
   r
   r   basic_statsl  s    r   c                 C   sX   |j }|dks|dks|dkrHt|\}}}}td| |||||f  ntt| d S )NrE   rG   integerz%s,%s,%f,%f,%f,%f)r   r   printr=   )r   tprR   r   minr,   rP   rQ   r
   r
   r   print_attributeq  s
    r   c                 C   sF   t | \}}tt|j t|j |D ]}t||| ||  q(d S r   )r   r   r$   r   rO   r   )filenamerR   r   r'   r
   r
   r   	test_wekaz  s
    
r   F__main__r/   )N)0r   ro   Znumpyr   r   r   __all__r   Zr_metar   r   r   r   r   r   r0   rg   r   r   IOErrorr   r   r   r#   rD   rU   rW   rr   r   r   r   r2   r   r   r   r   r   r   r   r   r   r   r   Z__test__r   sysargvr   r
   r
   r
   r   <module>   sX   










K=S32
#DJQD		

