🍅빅데이터 분석기사/정리
빅데이터분석기사 실기체험 2번
by 류딩이
2025. 6. 7.
import pandas as pd
train = pd.read_csv("data/customer_train.csv")
test = pd.read_csv("data/customer_test.csv")
y = train['총구매액']
# 결측값 채우기
train['환불금액'] = train['환불금액'].fillna(0)
test['환불금액'] = train['환불금액'].fillna(0)
# 필요없는 컬럼제거
train_drop = train.drop(columns = '회원ID')
test_drop = test.drop(columns = '회원ID')
# 더미변수로 변환 (인코딩)
train_dummies = pd.get_dummies(train)
test_dummies = pd.get_dummies(test)
# 컬럼개수 맞추기
for col in train_dummies.columns :
if col not in test_dummies.columns:
train_dummies.drop(columns=col, axis=1, inplace=True)
for col in test_dummies.columns:
if col not in test_dummies.columns:
train_dummies.drop(columns=col, axis=1, inplace=True)
# 모델생성
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(train_dummies, y)
pred = model.predict(test_dummies)
# 데이터프레임 생성
result = pd.DataFrame({'pred':pred})
# csv파일저장
result.to_csv('result.csv', index=False)
# csv파일 읽기
resultfile = pd.read_csv('result.csv')
print(resultfile)