소스 참조 | Notion

<https://github.com/taehojo/deeplearning>

10시 51분

import pandas as pd

fish = pd.read_csv('<https://bit.ly/fish_csv_data>')
fish.head(5)
# 문제, 정답 분리
fish_input = fish[['Weight','Length','Diagonal','Height','Width']].to_numpy()
fish_target = fish['Species'].to_numpy()

from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(
    fish_input, fish_target, random_state=42)

test_input, add_input, test_target, add_target = train_test_split( 
    test_input, test_target, random_state=42)

09시 32분

import numpy as np

perch_length = np.array(
    [8.4, 13.7, 15.0, 16.2, 17.4, 18.0, 18.7, 19.0, 19.6, 20.0,
     21.0, 21.0, 21.0, 21.3, 22.0, 22.0, 22.0, 22.0, 22.0, 22.5,
     22.5, 22.7, 23.0, 23.5, 24.0, 24.0, 24.6, 25.0, 25.6, 26.5,
     27.3, 27.5, 27.5, 27.5, 28.0, 28.7, 30.0, 32.8, 34.5, 35.0,
     36.5, 36.0, 37.0, 37.0, 39.0, 39.0, 39.0, 40.0, 40.0, 40.0,
     40.0, 42.0, 43.0, 43.0, 43.5, 44.0]
     )
perch_weight = np.array(
    [5.9, 32.0, 40.0, 51.5, 70.0, 100.0, 78.0, 80.0, 85.0, 85.0,
     110.0, 115.0, 125.0, 130.0, 120.0, 120.0, 130.0, 135.0, 110.0,
     130.0, 150.0, 145.0, 150.0, 170.0, 225.0, 145.0, 188.0, 180.0,
     197.0, 218.0, 300.0, 260.0, 265.0, 250.0, 250.0, 300.0, 320.0,
     514.0, 556.0, 840.0, 685.0, 700.0, 700.0, 690.0, 900.0, 650.0,
     820.0, 850.0, 900.0, 1015.0, 820.0, 1100.0, 1000.0, 1100.0,
     1000.0, 1000.0]
     )

from sklearn.model_selection import train_test_split

# 훈련 세트와 테스트 세트로 나눕니다
train_input, test_input, train_target, test_target = train_test_split(
    perch_length, perch_weight, random_state=42)
# 훈련 세트와 테스트 세트를 2차원 배열로 바꿉니다
train_input = train_input.reshape(-1, 1)
test_input = test_input.reshape(-1, 1)

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
# 선형 회귀 모델 훈련
lr.fit(train_input, train_target)

print(lr.coef_, lr.intercept_)
# a = 39.01714, b = -709.01864

09시 09분

import numpy as np
import matplotlib.pyplot as plt 
#공부 시간 X와 성적 Y의 넘파이 배열을 만듭니다.
x = np.array([2, 4, 6, 8])
y = np.array([81, 93, 91, 97]) 
 
# 기울기 a와 절편 b의 값을 초기화합니다.
a = 0
b = 0
 
#학습률을 정합니다.
lr = 0.03
 
#몇 번 반복될지를 설정합니다. 
epochs = 1000
 
# x 값이 총 몇 개인지 셉니다.
# x 값이 총 몇 개인지 셉니다.
n=len(x)
 
#경사 하강법을 시작합니다.
t1 = []
t2 = []
for i in range(epochs):                  # epoch 수 만큼 반복
    
    y_pred = a * x + b                   # 예측 값을 구하는 식입니다. 
    error = y - y_pred                   # 실제 값과 비교한 오차를 error로 놓습니다.
    # print(error)
    # 결과
    # [81 93 91 97]
    
    # 평균제곱 오차를 구한다.
    a_diff = (2/n) * sum(-x * (error))   # 오차 함수를 a로 편미분한 값입니다. 
    b_diff = (2/n) * sum(-(error))       # 오차 함수를 b로 편미분한 값입니다. 
    t2.append(a_diff)
 
    #
    a = a - lr * a_diff     # 학습률을 곱해 기존의 a 값을 업데이트합니다.
    b = b - lr * b_diff     # 학습률을 곱해 기존의 b 값을 업데이트합니다.
    t1.append(a)
    
    #print(a_diff, a)
    # 결과
    # -928.0 27.84
 
# ( 기울기, 오차 )
plt.xlabel('gradient(a)') 
plt.ylabel('lose value') 
 
plt.scatter(t1, t2, alpha=0.05)#, c = order)
print(a, b)