Preprocessing

# 셀 1: load_data → datetime 인덱스, 결측 보간, LOD(0→최소검출값/2) 처리, 요일(dayofweek) 파생  
df = load_data(...)
df = df.infer_objects().interpolate(method='time')
for col in zero_cols:
    lod = df.loc[df[col]>0, col].min()/2
    df[col].replace(0, lod, inplace=True)
df['dayofweek'] = df.index.dayofweek
# 셀 2: smooth_ccf_area → 최대 60-lag CCF 계산, Savitzky–Golay 스무딩 → 면적(area)  
area = smooth_ccf_area(df[var], df[target])
selected_features = [v for v,a in area_dict.items() if a >= np.percentile(list(area_dict.values()),50)]

기본 FNN 회귀 모델

#  로그변환+스케일링+70/15/15 분할
y_capped = np.clip(y_raw, None, np.percentile(y_raw,99))
y_log = np.log1p(y_capped)
X = StandardScaler().fit_transform(X_raw)
y = StandardScaler().fit_transform(y_log)

# 64→32 유닛, Dropout(0.3), MSE+MAE, EarlyStopping  
model = Sequential([
    Input(len(selected_features)), 
    Dense(64,'relu'), Dropout(0.3),
    Dense(32,'relu'), Dropout(0.3),
    Dense(1,'linear')
])
model.compile(Adam(1e-3),'mse',['mae'])
history = model.fit(..., validation_data=..., callbacks=[EarlyStopping('val_mae',patience=5)])

시퀀스 모델 (RNN / LSTM / 1D-CNN)

# sliding window 생성 (윈도우=12)
X_seq, y_seq = make_sequences(X, y_scaled, window=12)

# SimpleRNN(64)  
rnn = Sequential([ Input((12,F)), SimpleRNN(64,'tanh'), Dropout(0.3), Dense(1) ])

# LSTM(64)  
lstm = Sequential([ Input((12,F)), LSTM(64,'tanh'), Dropout(0.3), Dense(1) ])

# 1D-CNN(64→GlobalMaxPool→32)  
cnn = Sequential([
    Input((12,F)), Conv1D(64,3,'relu','same'), GlobalMaxPool1D(),
    Dropout(0.3), Dense(32,'relu'), Dense(1)
])