[Option] Ensemble models

Extreme Gradient Boosting

Extreme Gradient Boosting(XGBoost)은 이전 모델의 오차를 보완하며 강력한 예측 성능을 얻는 Gradient Boosting에서 보다 효율적이고 강력하게 개선한 앙상블 알고리즘이다.

# 예측
from xgboost import XGBRegressor

params = {
    'objective': 'reg:squarederror',
    'base_score': None,
    'booster': "gbtree",
    'callbacks': None,
    'colsample_bylevel': 1,
    'colsample_bynode': 1,
    'colsample_bytree': 1,
    'device': "cpu",
    'early_stopping_rounds': None,
    'enable_categorical': False,
    'eval_metric': None,
    'feature_types': None,
    'feature_weights': None,
    'gamma': 0,
    'grow_policy': "depthwise",
    'importance_type': None,
    'interaction_constraints': None,
    'learning_rate': None,
    'max_bin': 256,
    'max_cat_threshold': None,
    'max_cat_to_onehot': None,
    'max_delta_step': 0,
    'max_depth': 6,
    'max_leaves': 0,
    'min_child_weight': 1,
    'missing': np.nan,
    'monotone_constraints': None,
    'multi_strategy': "one_output_per_tree",
    'n_estimators': None,
    'n_jobs': None,
    'num_parallel_tree': 1,
    'random_state': 54,
    'reg_alpha': None,
    'reg_lambda': None,
    'sampling_method': "uniform",
    'scale_pos_weight': 1,
    'subsample': 1,
    'tree_method': "auto",
    'validate_parameters': None,
    'verbosity': 1
    }
model = XGBRegressor(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [17.255356 19.46585  34.37832 ... 21.445591 34.170517 44.388393]

# 분류
from xgboost import XGBClassifier

params = {
    'objective': 'binary:logistic',
    'base_score': None,
    'booster': "gbtree",
    'callbacks': None,
    'colsample_bylevel': 1,
    'colsample_bynode': 1,
    'colsample_bytree': 1,
    'device': "cpu",
    'early_stopping_rounds': None,
    'enable_categorical': False,
    'eval_metric': None,
    'feature_types': None,
    'feature_weights': None,
    'gamma': 0,
    'grow_policy': "depthwise",
    'importance_type': None,
    'interaction_constraints': None,
    'learning_rate': None,
    'max_bin': 256,
    'max_cat_threshold': None,
    'max_cat_to_onehot': None,
    'max_delta_step': 0,
    'max_depth': 6,
    'max_leaves': 0,
    'min_child_weight': 1,
    'missing': np.nan,
    'monotone_constraints': None,
    'multi_strategy': "one_output_per_tree",
    'n_estimators': None,
    'n_jobs': None,
    'num_parallel_tree': 1,
    'random_state': 54,
    'reg_alpha': None,
    'reg_lambda': None,
    'sampling_method': "uniform",
    'scale_pos_weight': 1,
    'subsample': 1,
    'tree_method': "auto",
    'validate_parameters': None,
    'verbosity': 1
    }
model = XGBClassifier(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [0 1 1 ... 0 1 0]
y_pred_proba = model.predict_proba(X_test)
# [[5.5266428e-01 4.4733569e-01]
#  [1.1748672e-03 9.9882513e-01]
#  [3.8581157e-01 6.1418843e-01]
#  ...
#  [9.5230120e-01 4.7698796e-02]
#  [7.2717667e-05 9.9992728e-01]
#  [9.9887937e-01 1.1206559e-03]]

Light Gradient Boosting Machine

Light Gradient Boosting Machine(LightGBM)은 빠르고 메모리 효율적인 Gradient Boosting 기반 알고리즘이다.

# 예측
from lightgbm import LGBMRegressor

params = {
    'boosting_type': 'gbdt',
    'class_weight': None,
    'colsample_bytree': 1.0,
    'importance_type': 'split',
    'learning_rate': 0.1,
    'max_depth': -1,
    'min_child_samples': 20,
    'min_child_weight': 0.001,
    'min_split_gain': 0.0,
    'n_estimators': 100,
    'n_jobs': None,
    'num_leaves': 31,
    'objective': None,
    'random_state': None,
    'reg_alpha': 0.0,
    'reg_lambda': 0.0,
    'subsample': 1.0,
    'subsample_for_bin': 200000,
    'subsample_freq': 0
    }
model = LGBMRegressor(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [18.56952631 20.30663304 35.26689802 ... 20.02686963 33.01882308 38.93578765]

# 분류
from lightgbm import LGBMClassifier

params = {
    'boosting_type': 'gbdt',
    'class_weight': None,
    'colsample_bytree': 1.0,
    'importance_type': 'split',
    'learning_rate': 0.1,
    'max_depth': -1,
    'min_child_samples': 20,
    'min_child_weight': 0.001,
    'min_split_gain': 0.0,
    'n_estimators': 100,
    'n_jobs': None,
    'num_leaves': 31,
    'objective': None,
    'random_state': None,
    'reg_alpha': 0.0,
    'reg_lambda': 0.0,
    'subsample': 1.0,
    'subsample_for_bin': 200000,
    'subsample_freq': 0
    }
model = LGBMClassifier(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [1 1 1 ... 0 1 0]
y_pred_proba = model.predict_proba(X_test)
# [[3.71050638e-01 6.28949362e-01]
#  [3.07202807e-04 9.99692797e-01]
#  [1.13204816e-01 8.86795184e-01]
#  ...
#  [9.66838794e-01 3.31612062e-02]
#  [1.04835184e-04 9.99895165e-01]
#  [9.99730372e-01 2.69628226e-04]]

CatBoost

CatBoost는 …

Tabular Prior-data Fitted Network

Tabular Prior-data Fitted Network(TabPFN)는 …

[Option] Mitra

Mitra는 2025년 7월 amazon science에서 발표한

Mitra: Mixed synthetic priors for enhancing tabular foundation models - Amazon Science