0901 TensorFlow

SMALL

Tensor Flow는 opensource software library

for Numerical computation - 수치계산을 위한 라이브러리

Using data flow grapgs - Node와 Edge로 구성된 방향성 있는 그래프

Node : 실제 Numerical opreration(수치 연산), 데이터 입출력 담당

다른 Node 간에 데이터가 흐르는 길을 표현하는 게 Edge예요.

Edge는 방향성이 있어요!

Edge를 통해서 흘러가는 데이터를 보고 Tensor(동적 크기의 다차원 배열)라고 해요.

1.x 버전

2.x 버전(keras)

버전 간 호환성이 없어요! (보이는 측면에서 조금 변했어요, 내부적으로 많이 변했어요)

1.15 버전 먼저 사용해 보아요!

anaconda → 가상횐경이동→conda install tensor flow==1.15

버전이 다를경우

conda uninstall tensor flow # 현재 설치되어 있는 tensor flow를 삭제

버전 확인!!

import tensorflow
print(tensorflow.__version__) # 1.15.0

import tensorflow as tf

#node 1개 생성해보아요.(graph를 그려보아요)
node = tf.constant('Hello World') # constant상수
# print(node)
#Tensor("Const_2:0", shape=(), dtype=string) →Tensor의 내용을 알려줘요!

#Tensor Flow 1. 버전에서는 Session이 있어야 그래프가 실행돼요.

#그래프(노드)를 실행하려면 Session이 필요해요.
sess = tf.Session()
print(sess.run(node).decode()) # Session을 이용해서 node를 실행

#Hello World

import tensorflow as tf

node1 = tf.constant(10, dtype=tf.float32) #data type을 지정해줘요.
node2 = tf.constant(20, dtype=tf.float32)

node3 = node1 + node2

sess = tf.Session()

print(sess.run(node3)) # 30.0

print(sess.run([node3, node1]))

#[30.0, 10.0] 각각 실행할 수 있어요!

# 두 개의 숫자를 입력받아서 숫자를 더해 출력하는 코드를 작성해 보아요!

import tensorflow as tf

node1 = tf.placeholder(dtype=tf.float32) # placeholder는 데이터를 저장하는 장소라는 의미예요!
node2 = tf.placeholder(dtype=tf.float32)) # 현재 placeholder 안은 비어있어, 현재 숫자 scalar

node3 = node1 + node2

sess = tf.Session()

# print(sess.run(node3)) #Error!! 실행할 때 값을 넣어줘야 해요.
#dictionary형태로 feed 데이터를 넣어줘야 해요!
print(sess.run(node3, feed_dict={node1 : 50,
node2 : 100}))

#150. 0

# tensor flow의 기본적인 사용법을 배웠으니 이를 이용해서 Multiple Linear Regression을 구현
# 태양광, 바람, 온도에 따른 오존량 예측에 대한 머신러닝 코드를 작성해 보아요!
# %reset

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy import stats # zscore로 이상치 판별
from sklearn import linear_model # sklearn으로 모델 생성, 학습, 예측하기 위해서 필요!
from sklearn.preprocessing import MinMaxScaler   # Normalization을 위해 필요!

# Raw Data Loading
df = pd.read_csv('./data/ozone/ozone.csv', sep=',')

# display(df)
training_data_set = df[['Solar.R', 'Wind', 'Temp', 'Ozone']]
# display(training_data_set.head())

# # 1. 결치값부터 처리해야 해요!
training_data = training_data_set.dropna(how='any')
# display(training_data.shape)   # (111, 4)

# # 2. 이상치를 처리해야 해요!
zscore_threshold = 1.8

# # Ozone에 대한 이상치(outlier) 처리
outlier = training_data['Ozone'][np.abs(stats.zscore(training_data['Ozone'])) > zscore_threshold]
training_data = training_data.loc[~(training_data['Ozone'].isin(outlier))]
# display(training_data)   # 104 rows × 4 columns

# 3. Normalization(정규화) - Min-Max Normalization
scaler_x = MinMaxScaler()    # scaling 작업을 수행하는 객체를 하나 생성
scaler_y = MinMaxScaler()

scaler_x.fit(training_data.iloc[:,:-1].values)
scaler_y.fit(training_data['Ozone'].values.reshape(-1,1))

training_data.iloc[:,:-1] = scaler_x.transform(training_data.iloc[:,:-1].values)
training_data['Ozone'] = scaler_y.transform(training_data['Ozone'].values.reshape(-1,1))

# display(training_data)

# Training Data Set
x_data = training_data.iloc[:,:-1].values
t_data = training_data['Ozone'].values.reshape(-1,1)

######################데이터 준비완료##########################

# machine learning code를 작성해 보아요!(Tensor flow)

# 1. 그래프에 입력값을 밀어 넣기 위해 Training Data Set을 받아들이는 node를 생성
# placeholder

# X는 3개의 입력값, 입력받는 숫자가 scalar가 아닌 차원을 가지는 데이터면 shape을 명시
#shape은 행과 열로 적어주는데 현재 node는 데이터를 가지고 있지 않아 (-1,3)은 하면 안 돼요
# X = tf.placeholder(shape=[110,3]) # 이렇게 지정하면 predcition 할 때 데이터를 110개를 넣어줘야 해서 곤란해요

# 입력을 받는 Node
#None은 들어오는 데이터에 대해 행의 수는 상관하지 않겠어, -1에 해당하는 의미
# placeholder 나중에 데이터 받기 위해 사용
X = tf.placeholder(shape=[None,3], dtype=tf.float32)
T = tf.placeholder(shape=[None,1], dtype=tf.float32)

# 2. Weight & bias
# W = np.random.rand(3,1) #랜덤 값을 가지고 변수화 - 균등 분포, Numpy코드
# b = random.rand(1)
# W값이 변수여야 해요.,random.normal() - 정규분포,
#shape를 줘야 해요 []

W = tf.Variable(tf.random.normal([3,1]), name='weight')
b = tf.Variable(tf.random.normal([1]), name='bias')

# 3. Multiple Linear Regression Model => Hypothesis(가설)
# H = np.dot(X,W) + b # Numpy코드

H = tf.matmul(X,W) + b

# 4. loss function - W, b를 업데이트하기 위해서 오차를 구해요.

#  square는 제곱의 의미, loss = np.mean(np.power((H-T),2))
loss = tf.reduce_mean(tf.square(H-T))

# 5. 학습(Gradient Descent Alorithm을 이용해서(편미분 포함) W와 b를 갱신 )
#W와 b를 최적화(Optimizer) 경사 하강법을하기 위해 GradientDescentOptimizer(사용
# 1번 편미분 해서 W와 b를 갱신하는 역할을 하는 node를 생성(1번 학습)
#편미분 파이썬 코드를 한 줄로 표현, loss값을 줄여주는 작업 minimize(loss)

train = tf.train.GradientDescentOptimizer(learning_rate=1e-4).minimize(loss)

####그림 완성####

# 6. session & 초기화 (Tensor Flow변수들을 session으로 실행하기 전에 초기화)
# global_variables_initializer(), 버전 1.x초기화 코드, 2.0은 없어요!

sess = tf.Session()   # 그래프를 실행시키기 위한 session을 생성
sess.run(tf.global_variables_initializer())   # 초기화

# 반복학습을 진행 -> 학습 Node를 이용해서.

for step in range(300000):

    tmp, W_val, b_val, loss_val = sess.run([train, W, b, loss], #node를 실행하면 node의 값이 나와요.
                                           feed_dict={X : x_data,
                                                      T : t_data})
    if step % 30000 == 0:
        print('W:{}, b:{}, loss:{}'.format(W_val,b_val,loss_val))

#W:[[ 0.19102935]
#[-0.5484885 ]
#[ 0.49159238]], b:[0.25061813], loss:0.02588055282831192

# 학습이 종료돼서 W와 b가 최적화되었어요!
# Prediction

predict_data = np.array([[180.0,10.0,80.0]]) # Solar.R, Wind, Temp 2차원
scaled_predict_data = scaler_x.transform(predict_data) # 정규화된 데이터로 변환

scaled_result = sess.run(H, feed_dict={X:scaled_predict_data})

result = scaler_y.inverse_transform(scaled_result.reshape(-1,1)) # 원래 값으로 복귀

print(result)

# [[40.86704]] # X값이 [180.0, 10.0, 80.0] 일 때 Ozone

sklearn으로 구현

# Raw Data Loading
df = pd.read_csv('./data/ozone/ozone.csv', sep=',')

# display(df)
training_data_set = df[['Solar.R', 'Wind', 'Temp', 'Ozone']]
# display(training_data_set.head())

# # 1. 결치값부터 처리해야 해요!
training_data = training_data_set.dropna(how='any')
# display(training_data.shape)   # (111, 4)

# # 2. 이상치를 처리해야 해요!
zscore_threshold = 1.8

# # Ozone에 대한 이상치(outlier) 처리
outlier = training_data['Ozone'][np.abs(stats.zscore(training_data['Ozone'])) > zscore_threshold]
training_data = training_data.loc[~(training_data['Ozone'].isin(outlier))]
# display(training_data)   # 104 rows × 4 columns
# 정규화 빼고 실행했어요
x_data = training_data.iloc[:,:-1].values
t_data = training_data['Ozone'].values.reshape(-1,1)

model = linear_model.LinearRegression()  # 모델
model.fit(x_data, t_data)      # 학습
sklearn_result = model.predict([[180.0,10.0,80.0]]) #예측
print(sklearn_result)

# [[41.59545428]]

LIST

'머신러닝 딥러닝' 카테고리의 다른 글

0902 Logistic Regression Model (0)	2021.09.02
0901 Classification (0)	2021.09.01
0831 MultipleLlinear Regression (0)	2021.08.31
0831 정규화(Normalization) (0)	2021.08.31
0830 linear regression 데이터 전처리 (0)	2021.08.30

대금부는개발자

0901 TensorFlow

'머신러닝 딥러닝' 카테고리의 다른 글

댓글

티스토리툴바

0901 TensorFlow

'머신러닝 딥러닝' 카테고리의 다른 글

관련글

댓글

티스토리툴바