본문 바로가기
🍅빅데이터 분석기사/정리

빅데이터분석기사 실기체험 2번

by 류딩이 2025. 6. 7.
import pandas as pd

train = pd.read_csv("data/customer_train.csv")
test = pd.read_csv("data/customer_test.csv")
y = train['총구매액']

# 결측값 채우기
train['환불금액'] = train['환불금액'].fillna(0)
test['환불금액'] = train['환불금액'].fillna(0)

# 필요없는 컬럼제거
train_drop = train.drop(columns = '회원ID')
test_drop = test.drop(columns = '회원ID')


# 더미변수로 변환 (인코딩)
train_dummies = pd.get_dummies(train)
test_dummies = pd.get_dummies(test)

# 컬럼개수 맞추기
for col in train_dummies.columns :
	if col not in test_dummies.columns:
		train_dummies.drop(columns=col, axis=1, inplace=True)

for col in test_dummies.columns:
	if col not in test_dummies.columns:
		train_dummies.drop(columns=col, axis=1, inplace=True)

# 모델생성
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(train_dummies, y)
pred = model.predict(test_dummies)

# 데이터프레임 생성
result = pd.DataFrame({'pred':pred})

# csv파일저장
result.to_csv('result.csv', index=False)
# csv파일 읽기
resultfile = pd.read_csv('result.csv')
print(resultfile)