import numpy as np, matplotlib.pyplot as plt

def mean(x):
    return sum(x)/len(x)

def var(x):
    return sum((x-mean(x))**2)/len(x)

def cov(x,y):
    return mean(x*y) - mean(x)*mean(y)

def corr(x,y):
    return cov(x,y)/(np.sqrt(var(x)*var(y)))

def avvik(coeff,x_line,y_data):
    pred_y = coeff[0]*x_line + coeff[1]
    return y_data-pred_y

def r_squared(y,pred_y):
    ss_tot = sum((y - mean(y))**2)
    ss_reg = sum((pred_y-mean(y))**2)
    ss_res = sum((pred_y - y)**2)

    return 1 - ss_res/ss_tot

def dev_intervall(y,pred_y,a,b):
    dev = y-pred_y
    count = 0
    for d in dev:
        if a<=d and b>=d:
            count += 1
    return count

def dev_list(y,pred_y,n=8):

    dev = y - pred_y
    avvik = np.linspace(min(dev),max(dev),n)
    antall = np.array([dev_intervall(y,pred_y,avvik[i],avvik[i+1]) for i in range(len(avvik)-1)])

    avvik = np.array([avvik[i]+(max(dev)-min(dev))/(2*n) for i in range(len(avvik)-1)])

    return avvik,antall

t = np.array([float(x) for x in '3.45   4.583  5.5    6.4    7.5    8.412  9.5   12.467 15.467 18.533 23.5   28.55'.split()])

y_0 = np.array([0.41233331, 0.36018618, 0.34651201, 0.32500858, 0.32149358, 0.31455768,
 0.29613963, 0.28354908, 0.26475105, 0.24861761, 0.24368566, 0.21113227])

y_1 = np.array([0.91364065, 1.04762865, 1.0860113,  1.14957231, 1.16036375, 1.18201095,
 1.2419146,  1.28506447, 1.35321773, 1.41571218, 1.435633,   1.57826103])

y_2 = np.array([2.42522245, 2.77634194, 2.88590285, 3.07684187, 3.110482,   3.1790672,
 3.37678551, 3.52672627, 3.77713323, 4.0222413,  4.10364725, 4.7363674 ])

y_data = [y_0,y_1,y_2]
names = ["Nullte", "Første", 'Andre']

x = np.linspace(0,10,10)
y = np.array([0*i for i in x])

for i in range(len(y_data)):
    coeff = np.polyfit(t,y_data[i],1)
    pred_y = coeff[0]*t + coeff[1]

    dev = avvik(coeff,t,y_data[i])

    #dev_size, antall = dev_list(y_data[i], pred_y)
    #normal_antall = np.exp(-(np.linspace(min(dev_size), max(dev_size), len(antall))) ** 2)
    #corr_norm = corr(antall, normal_antall)

    print(names[i])
    print('corr : ',corr(y_data[i],t))
    print('r^2 : ', r_squared(y_data[i],pred_y))
    #print('corr_norm : ', corr_norm)
    print()

    #plt.scatter(dev_size, antall)
    #plt.xlim(1.1*min(dev_size),1.1*max(dev_size))
    #plt.show()

    plt.scatter(t,y_data[i])
    plt.plot(t,pred_y)
    plt.ylim(min(pred_y),max(pred_y))
    plt.show()

    plt.plot(t,dev/np.sqrt(var(dev)),label = names[i])
    plt.ylim(1.1*min(dev/np.sqrt(var(dev))),1.1*max(dev/np.sqrt(var(dev))))
    plt.show()