인공신경망을 활용한 모델링 (이진 분류)

인공신경망을 활용한 모델링 (이진 분류)

2021. 5. 8. 03:11ㆍ데이터 사이언스/데이터 모델링

ㅇ 사용된 데이터를 통해 고객이 정기예금을 할지 안할지 예측하는것(이진분류)

ㅇ 모듈 임포트

import tensorflow as tf

from tensorflow import feature_column

from tensorflow.keras import layers

from tensorflow.keras.utils import to_categorical, plot_model

ㅇ Y를 원핫인코딩 후 인공신경망 CASE

# tf 모델을 돌리기 위해 판다스 데이터프레임의 값을 변환

def df_to_dataset(dataframe, shuffle=True, batch_size=32):

dataframe = dataframe.copy()

labels = dataframe.pop('deposit')

labels = to_categorical(labels)

ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))

if shuffle:

ds = ds.shuffle(buffer_size=len(dataframe))

#shuffle( )은 입력된 buffer_size만큼 data를 채우고 무작위로 sampling하여 새로운 data로 바꿈

ds = ds.batch(batch_size)

return ds

# tf 모델을 돌리기 위해 딥러닝 시 input으로 들어가는 판다스 데이터프레임의 열을 모델 훈련에 필요한 형태로 매핑

feature_columns1 = []

# 모델링 시 입력되는 수치형 열 추출

for header in x.columns:

feature_columns1.append(feature_column.numeric_column(header))

# tf를 위해 형변환 된 열 형태를 압축시킴

feature_layer1 = tf.keras.layers.DenseFeatures(feature_columns1)

# 데이터 분할

train3, test3 = train_test_split(total, test_size=0.3)

train3, val3 = train_test_split(train3, test_size=0.2)

# 각 훈련 및 검증, 테스트 데이터를 tf 모델에 맞게 형 변환 (위에 정의한 메소드)

train_ds3 = df_to_dataset(train3, batch_size=32)

val_ds3 = df_to_dataset(val3, shuffle=False, batch_size=32)

test_ds3 = df_to_dataset(test3, shuffle=False, batch_size=32)

# 모델 정의

model = tf.keras.Sequential([

feature_layer1,

layers.Dense(128, activation='relu'),

layers.Dense(2, activation='sigmoid') #이진분류라 sigmoid 사용

])

model.compile(optimizer='adam',

loss='binary_crossentropy', #이진분류라 binary_crossentropy 사용

metrics=['accuracy'])

history = model.fit(train_ds3,

validation_data=val_ds3,

epochs=50)

# 정확도 고찰

loss, accuracy = model.evaluate(test_ds3)

print("정확도", accuracy)

#오버피팅 확인

history_dict = history.history

%matplotlib inline

acc = history.history['accuracy']

val_acc = history.history['val_accuracy']

epochs = range(1, 51)

plt.plot(epochs, acc, 'r', label = 'Training acc')

plt.plot(epochs, val_acc, 'b', label = 'Validation acc')

plt.title('Training and validaiont acc')

plt.xlabel('Epochs')

plt.ylabel('Acc')

plt.legend()

plt.show()

history_dict = history.history

%matplotlib inline

acc = history.history['loss']

val_acc = history.history['val_loss']

epochs = range(1, 51)

plt.plot(epochs, acc, 'r', label = 'Training loss')

plt.plot(epochs, val_acc, 'b', label = 'Validation loss')

plt.title('Training and validaiont loss')

plt.xlabel('Epochs')

plt.ylabel('loss')

plt.legend()

plt.show()

# 예측 결과 확인

predict = model.predict(test_ds3) #여기서 나온 예측값은 확률값

yhat_classes = model.predict_classes(test_ds3, verbose=0) #이진 형태로 분류해줌

# 지표 확인

accuracy = accuracy_score(test3['deposit'], yhat_classes)

print(accuracy)

print('Accuracy: %f' % accuracy)

# precision tp / (tp + fp)

precision = precision_score(test3['deposit'], yhat_classes)

print('Precision: %f' % precision)

# recall: tp / (tp + fn)

recall = recall_score(test3['deposit'], yhat_classes)

print('Recall: %f' % recall)

# f1: 2 tp / (2 tp + fp + fn)

f1 = f1_score(test3['deposit'], yhat_classes)

print('F1 score: %f' % f1)

# kappa

kappa = cohen_kappa_score(test3['deposit'], yhat_classes)

print('Cohens kappa: %f' % kappa)

# ROC AUC

auc = roc_auc_score(test3['deposit'], yhat_classes)

print('ROC AUC: %f' % auc)

# confusion matrix

matrix = confusion_matrix(test3['deposit'], yhat_classes)

print(matrix)

'데이터 사이언스 > 데이터 모델링' 카테고리의 다른 글

모델링 꿀팁 (0)	2021.06.01
모델 실행 코드 (0)	2021.05.08

KS-STORY

KS-STORY

태그

최근글

댓글

공지사항

아카이브

'데이터 사이언스 > 데이터 모델링' 카테고리의 다른 글

관련글

티스토리툴바