티스토리 뷰

import sklearn as sk
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


import pandas as pd
df=pd.read_json("A0007IT.json")


import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
plt.rc('font', family='NanumGothicCoding')

4. 분포도
count plot

sns.countplot(data = df, x = 'Address1')
plt.show()

idx_address = df.loc[df['Address1'] == '-'].index
df.drop(idx_address, inplace = True)


5. joint plot

sns.jointplot(x='Time_Driving', y='Speed_Per_Hour', data=df)
plt.show()

6. 300이 넘는 이상치 1개 삭제

idx_out = df.loc[df['Speed_Per_Hour'] > 300].index
df.drop(idx_out, inplace = True)
df_temp = df.copy()

7. 결측치 처리

print(df_temp.isnull().sum())
df_na = df_temp.dropna()
print(df_na.isnull().sum())

8. 불필요한 변수 삭제
df_del = df_na.drop(['Time_Departure', 'Time_Arrival'], axis = 1)

9. one-hot encoding
df_del.info()
object_cols = df_del.select_dtypes(include='object').columns
df_preset = pd.get_dummies(df_del, columns=object_cols)


원하는 데이터타입 컬럼 추출: select_dtypes

10. 훈련과 검증 데이터셋 분리

from sklearn.model_selection import train_test_split

target = 'Time_Driving'
y = df_preset[target]
X = df_preset.drop('Time_Driving', axis=1)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

11. 머신러닝 모델. 의사결정나무로 학습 진행

from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor(max_depth=5, min_samples_split=3, random_state=120)
model.fit(X_train, y_train)

12. 위 의사결정나무 모델의 성능 평가. 예측 결과의 mae(Mean Absolute Error)를 구하세요.

from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_valid)
dt_mae = mean_absolute_error(y_valid, y_pred)


import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical

tf.random.set_seed(1)

13. Time_Driving(실주행시간)을 예측하는 딥러닝 모델

from sklearn.metrics import mean_squared_error

model = Sequential([
    Dense(64, activation='relu',
          input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.2),

    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(1)
])

model.compile(optimizer = 'adam',
              loss = 'mean_squared_error')

history = model.fit(X_train, y_train,
                    batch_size = 16,
                    epochs = 30,
                    validation_data=(X_valid, y_valid))


14. 위 딥러닝 모델의 성능 평가. Matplotlib 라이브러리 활용해서 학습 mse와 검증 mse를 그래프로 표시

mse = history.history['loss']
val_mse = history.history['val_loss']

epochs = range(1, len(mse) + 1)

plt.plot(epochs, mse, 'b', label='mse')
plt.plot(epochs, val_mse, 'r', label='val_mse')

plt.title('Model MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

 

null 값 체크 함수

print(df_temp.isnull().sum())

====================================================

import sklearn as sk

import pandas as pd
df=pd.read_json("A0007IT.json")

import seaborn as sns
df.drop(df[(df['Address1']=='-')].index, inplace=True)
sns.countplot(x="Address1", data=df)

sns.jointplot(x="Time_Driving", y="Speed_Per_Hour", data=df)

df_temp=df.drop( df [ df['Speed_Per_Hour'] >=300 ].index, inplace=True)

df_na=df_temp.dropna(axis=0)

df_del=df.drop(['Time_Departure','Time_Arrival'],axis=1)

cols=df_del.select_dtypes('object').columns

df_preset=pd.get_dummies(data=df_del, columns=cols)


from sklearn.model_selection import train_test_split

target = 'Time_Driving'
y = df_preset[target]
X = df_preset.drop('Time_Driving', axis=1)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)


from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor(max_depth=5, min_samples_split=3, random_state=120)
model.fit(X_train, y_train)
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_valid)
dt_mae = mean_absolute_error(y_valid, y_pred)


from sklearn.metrics import mean_squared_error

model = Sequential([
    Dense(64, activation='relu',
          input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.2),

    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(1)
])

model.compile(optimizer = 'adam',
              loss = 'mean_squared_error')

history = model.fit(X_train, y_train,
                    batch_size = 16,
                    epochs = 30,
                    validation_data=(X_valid, y_valid))

mse = history.history['loss']
val_mse = history.history['val_loss']

epochs = range(1, len(mse) + 1)

plt.plot(epochs, mse, 'b', label='mse')
plt.plot(epochs, val_mse, 'r', label='val_mse')

plt.title('Model MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

 

import sklearn as sk

import pandas as pd
df=pd.read_json("A0007IT.json")

import seaborn as sns
df.drop(df[(df['Address1']=='-')].index, inplace=True)
sns.countplot(x="Address1", data=df)

sns.jointplot(x="Time_Driving", y="Speed_Per_Hour", data=df)

df_temp=df.drop( df [ df['Speed_Per_Hour'] >=300 ].index, inplace=True)

df_na=df_temp.dropna(axis=0)

df_del=df.drop(['Time_Departure','Time_Arrival'],axis=1)

cols=df_del.select_dtypes('object').columns

df_preset=pd.get_dummies(data=df_del, columns=cols)


from sklearn.model_selection import train_test_split

target = 'Time_Driving'
y = df_preset[target]
X = df_preset.drop('Time_Driving', axis=1)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)


from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor(max_depth=5, min_samples_split=3, random_state=120)
model.fit(X_train, y_train)
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_valid)
dt_mae = mean_absolute_error(y_valid, y_pred)


from sklearn.metrics import mean_squared_error

model = Sequential([
    Dense(64, activation='relu',
          input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.2),

    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(1)
])

model.compile(optimizer = 'adam',
              loss = 'mean_squared_error')

history = model.fit(X_train, y_train,
                    batch_size = 16,
                    epochs = 30,
                    validation_data=(X_valid, y_valid))

mse = history.history['loss']
val_mse = history.history['val_loss']

epochs = range(1, len(mse) + 1)

plt.plot(epochs, mse, 'b', label='mse')
plt.plot(epochs, val_mse, 'r', label='val_mse')

plt.title('Model MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()

 

https://velog.io/@harii-in/KT-AIVLE-4%EA%B8%B0-AICE-%EB%8C%80%EB%B9%84

 

[KT AIVLE 4기] AICE 대비

📌 5차 미니 프로젝트 - AICE 대비

velog.io

 

'AICE(AI Certificate for Everyone) > AICE Associate' 카테고리의 다른 글

[파이썬] DecisionTree 초 심플 예제  (0) 2024.02.19
aice associate  (0) 2024.02.13
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2024/05   »
1 2 3 4
5 6 7 8 9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30 31
글 보관함