import numpy as np, matplotlib.pyplot as plt

colors = ['b','r','g']
names = ['lin','sqr','cub']

x_line = np.linspace(0,10,10)

y_lin = [x for x in x_line]

y_sqr = x_line**2 + x_line
y_cub = 0.8*x_line**2 + x_line

y_data = [y_lin,y_sqr,y_cub]

def get_u_x(coeff,y):
    return (y-coeff[1])/coeff[0]

def get_u_y(coeff,x):
    return coeff[0]*x + coeff[1]

def get_std_x(coeff,x_data,y_data):
    std = 0
    n = len(x_data)
    for x,y in zip(x_data,y_data):
        std += (x-get_u_x(coeff,y))**2
    return np.sqrt(std/n)

def get_std_y(coeff,x_data,y_data):
    std = 0
    n = len(x_data)
    for y,x in zip(y_data,x_data):
        std += (y-get_u_y(coeff,x))**2
    return np.sqrt(std/n)

def get_cov_xy(coeff,x_data,y_data):
    cov = 0
    n = len(x_data)*len(y_data)
    for i in range(len(x_data)):
        for j in range(len(y_data)):
            cov += (x_data[i]-get_u_x(coeff,y_data[i]))*(y_data[j]-get_u_y(coeff,x_data[j]))
    return cov/n

def plot_data():

    coeffs = [[None, None] for i in range(3)]
    for i in range(3):
        coeffs[i] = np.polyfit(x_line, y_data[i], 1)

    for i in range(3):
        plt.scatter(x_line, y_data[i], color=colors[i], label=names[i])
        plt.plot(x_line, coeffs[i][0] * x_line + coeffs[i][1], color=colors[i])

        cov = get_cov_xy(coeffs[i],x_line,y_data[i])
        std_x = get_std_x(coeffs[i],x_line,y_data[i])
        std_y = get_std_y(coeffs[i],x_line,y_data[i])

        corr = cov/(std_x*std_y)

        print(cov)
        print(std_x)
        print(std_y)
        print()
        print('corr', names[i], corr)
        print()


plot_data()
plt.legend()
plt.show()
