Extreme Gradient Boosting(XGBoost)은 이전 모델의 오차를 보완하며 강력한 예측 성능을 얻는 Gradient Boosting에서 보다 효율적이고 강력하게 개선한 앙상블 알고리즘이다.
# 예측
from xgboost import XGBRegressor
params = {
'objective': 'reg:squarederror',
'base_score': None,
'booster': "gbtree",
'callbacks': None,
'colsample_bylevel': 1,
'colsample_bynode': 1,
'colsample_bytree': 1,
'device': "cpu",
'early_stopping_rounds': None,
'enable_categorical': False,
'eval_metric': None,
'feature_types': None,
'feature_weights': None,
'gamma': 0,
'grow_policy': "depthwise",
'importance_type': None,
'interaction_constraints': None,
'learning_rate': None,
'max_bin': 256,
'max_cat_threshold': None,
'max_cat_to_onehot': None,
'max_delta_step': 0,
'max_depth': 6,
'max_leaves': 0,
'min_child_weight': 1,
'missing': np.nan,
'monotone_constraints': None,
'multi_strategy': "one_output_per_tree",
'n_estimators': None,
'n_jobs': None,
'num_parallel_tree': 1,
'random_state': 54,
'reg_alpha': None,
'reg_lambda': None,
'sampling_method': "uniform",
'scale_pos_weight': 1,
'subsample': 1,
'tree_method': "auto",
'validate_parameters': None,
'verbosity': 1
}
model = XGBRegressor(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [17.255356 19.46585 34.37832 ... 21.445591 34.170517 44.388393]
# 분류
from xgboost import XGBClassifier
params = {
'objective': 'binary:logistic',
'base_score': None,
'booster': "gbtree",
'callbacks': None,
'colsample_bylevel': 1,
'colsample_bynode': 1,
'colsample_bytree': 1,
'device': "cpu",
'early_stopping_rounds': None,
'enable_categorical': False,
'eval_metric': None,
'feature_types': None,
'feature_weights': None,
'gamma': 0,
'grow_policy': "depthwise",
'importance_type': None,
'interaction_constraints': None,
'learning_rate': None,
'max_bin': 256,
'max_cat_threshold': None,
'max_cat_to_onehot': None,
'max_delta_step': 0,
'max_depth': 6,
'max_leaves': 0,
'min_child_weight': 1,
'missing': np.nan,
'monotone_constraints': None,
'multi_strategy': "one_output_per_tree",
'n_estimators': None,
'n_jobs': None,
'num_parallel_tree': 1,
'random_state': 54,
'reg_alpha': None,
'reg_lambda': None,
'sampling_method': "uniform",
'scale_pos_weight': 1,
'subsample': 1,
'tree_method': "auto",
'validate_parameters': None,
'verbosity': 1
}
model = XGBClassifier(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [0 1 1 ... 0 1 0]
y_pred_proba = model.predict_proba(X_test)
# [[5.5266428e-01 4.4733569e-01]
# [1.1748672e-03 9.9882513e-01]
# [3.8581157e-01 6.1418843e-01]
# ...
# [9.5230120e-01 4.7698796e-02]
# [7.2717667e-05 9.9992728e-01]
# [9.9887937e-01 1.1206559e-03]]
Light Gradient Boosting Machine(LightGBM)은 빠르고 메모리 효율적인 Gradient Boosting 기반 알고리즘이다.
# 예측
from lightgbm import LGBMRegressor
params = {
'boosting_type': 'gbdt',
'class_weight': None,
'colsample_bytree': 1.0,
'importance_type': 'split',
'learning_rate': 0.1,
'max_depth': -1,
'min_child_samples': 20,
'min_child_weight': 0.001,
'min_split_gain': 0.0,
'n_estimators': 100,
'n_jobs': None,
'num_leaves': 31,
'objective': None,
'random_state': None,
'reg_alpha': 0.0,
'reg_lambda': 0.0,
'subsample': 1.0,
'subsample_for_bin': 200000,
'subsample_freq': 0
}
model = LGBMRegressor(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [18.56952631 20.30663304 35.26689802 ... 20.02686963 33.01882308 38.93578765]
# 분류
from lightgbm import LGBMClassifier
params = {
'boosting_type': 'gbdt',
'class_weight': None,
'colsample_bytree': 1.0,
'importance_type': 'split',
'learning_rate': 0.1,
'max_depth': -1,
'min_child_samples': 20,
'min_child_weight': 0.001,
'min_split_gain': 0.0,
'n_estimators': 100,
'n_jobs': None,
'num_leaves': 31,
'objective': None,
'random_state': None,
'reg_alpha': 0.0,
'reg_lambda': 0.0,
'subsample': 1.0,
'subsample_for_bin': 200000,
'subsample_freq': 0
}
model = LGBMClassifier(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# [1 1 1 ... 0 1 0]
y_pred_proba = model.predict_proba(X_test)
# [[3.71050638e-01 6.28949362e-01]
# [3.07202807e-04 9.99692797e-01]
# [1.13204816e-01 8.86795184e-01]
# ...
# [9.66838794e-01 3.31612062e-02]
# [1.04835184e-04 9.99895165e-01]
# [9.99730372e-01 2.69628226e-04]]
CatBoost는 …
Tabular Prior-data Fitted Network(TabPFN)는 …
Mitra는 2025년 7월 amazon science에서 발표한
Mitra: Mixed synthetic priors for enhancing tabular foundation models - Amazon Science