import numpy as np, matplotlib.pyplot as plt, random as ran

def mean(x):
    return sum(x)/len(x)

def var(x):
    return sum((x-mean(x))**2)/len(x)

def cov(x,y):
    return mean(x*y) - mean(x)*mean(y)

def corr(x,y):
    return cov(x,y)/(np.sqrt(var(x)*var(y)))

def dev_intervall(y,pred_y,a,b):
    dev = y-pred_y
    count = 0
    for d in dev:
        if a<=d and b>=d:
            count += 1
    return count

def dev_list(y,pred_y,n=20):

    dev = y - pred_y
    avvik = np.linspace(min(dev),max(dev),n)
    antall = np.array([dev_intervall(y,pred_y,avvik[i],avvik[i+1]) for i in range(len(avvik)-1)])

    avvik = np.array([avvik[i]+(max(dev)-min(dev))/(2*n) for i in range(len(avvik)-1)])

    return avvik,antall

def r_squared(y,pred_y):
    ss_tot = sum((y - mean(y))**2)
    ss_reg = sum((pred_y-mean(y))**2)

    return ss_reg/ss_tot

def noise(data, deg):
    x_line = np.linspace(-deg,deg,len(data))
    noise_list = (x_line/abs(x_line))*np.exp(-abs(x_line))
    noise_list = list(noise_list)

    for i in range(len(data)):
        data[i] += noise_list.pop(ran.randint(0,len(noise_list)-1))
    return data

def gen_noise_list(deg,varians,dim):
    std = np.sqrt(varians)
    antall = lambda x: (100/std)*np.exp(-(x/std)**2)
    avvik = np.linspace(-deg,deg,(2*deg+1)*10)

    avvik_liste = []
    for av in avvik:
        avvik_liste.extend([av for ant in range(int(antall(av)))])

    noise_list = [avvik_liste[ran.randint(0,len(avvik_liste)-1)] for i in range(dim)]
    return np.array(noise_list)

def pos_neg_mean_dev(y,pred_y):
    dev = y - pred_y

    pos_dev = []
    neg_dev = []
    for d in dev:
        if d <= 0:
            neg_dev.append(d)
        if d >= 0:
            pos_dev.append(d)

    if len(pos_dev) == 0:
        pos_dev = [0]
    if len(neg_dev) == 0:
        neg_dev = [0]

    return mean(pos_dev),mean(neg_dev)


def plot_data():

    coeffs = [[None, None] for i in range(len(y_data))]
    for i in range(len(y_data)):
        coeffs[i] = np.polyfit(x_line, y_data[i], 1)

    for i in range(len(y_data)):
        plt.scatter(x_line, y_data[i], color=colors[i], label=names[i])
        plt.plot(x_line, coeffs[i][0] * x_line + coeffs[i][1], color=colors[i])
        plt.show()

        pred_y = coeffs[i][0]*x_line + coeffs[i][1]

        avvik,antall = dev_list(y_data[i], pred_y)

        normal_antall = max(antall)*np.exp(-(np.linspace(min(avvik),max(avvik),len(antall)))**2)
        corr_norm = corr(antall,normal_antall)

        plt.scatter(avvik,antall)
        plt.scatter(avvik,normal_antall)
        plt.show()

        plt.scatter(x_line,y_data[i]-pred_y,label=names[i])
        plt.show()

        print(names[i])
        print('std:  ',np.sqrt(var(y_data[i]-pred_y)))
        print('corr: ', corr(pred_y,y_data[i]))
        print('r^2:  ',r_squared(y_data[i],pred_y))
        print('corr_normalfordeling: ', corr_norm)
        print()

colors = ['b','r','g']
names = ['lin','sqr','cub']

x_line = np.linspace(0,10,1000)

y_lin = np.array([x for x in x_line])
y_sqr = 0.05*x_line**2 + x_line
y_cub = 0.005*x_line**3 + x_line

y_lin += gen_noise_list(5,1,len(y_lin))
y_sqr += gen_noise_list(5,1,len(y_sqr))

y_data = [y_lin,y_sqr]

plot_data()
#plt.legend()
plt.show()