@@ -24,6 +24,7 @@ def fit(
24
24
config : Optional [dict ] = None ,
25
25
target_label : str = "Decision" ,
26
26
validation_df : Optional [pd .DataFrame ] = None ,
27
+ silent : bool = False ,
27
28
) -> Dict [str , Any ]:
28
29
"""
29
30
Build (a) decision tree model(s)
@@ -55,6 +56,9 @@ def fit(
55
56
if nothing is passed to validation data frame, then the function validates
56
57
built trees for training data frame
57
58
59
+ silent (bool): set this to True if you do not want to see
60
+ any informative logs
61
+
58
62
Returns:
59
63
chefboost model
60
64
"""
@@ -139,7 +143,8 @@ def fit(
139
143
140
144
if enableParallelism == True :
141
145
num_cores = config ["num_cores" ]
142
- logger .info (f"[INFO]: { num_cores } CPU cores will be allocated in parallel running" )
146
+ if silent is False :
147
+ logger .info (f"[INFO]: { num_cores } CPU cores will be allocated in parallel running" )
143
148
144
149
from multiprocessing import set_start_method , freeze_support
145
150
@@ -169,7 +174,8 @@ def fit(
169
174
config ["algorithm" ] = "Regression"
170
175
171
176
if enableGBM == True :
172
- logger .info ("Gradient Boosting Machines..." )
177
+ if silent is False :
178
+ logger .info ("Gradient Boosting Machines..." )
173
179
algorithm = "Regression"
174
180
config ["algorithm" ] = "Regression"
175
181
@@ -184,7 +190,8 @@ def fit(
184
190
185
191
# -------------------------
186
192
187
- logger .info (f"{ algorithm } tree is going to be built..." )
193
+ if silent is False :
194
+ logger .info (f"{ algorithm } tree is going to be built..." )
188
195
189
196
# initialize a dictionary. this is going to be used to check features numeric or nominal.
190
197
# numeric features should be transformed to nominal values based on scales.
@@ -212,7 +219,13 @@ def fit(
212
219
213
220
if enableAdaboost == True :
214
221
trees , alphas = adaboost_clf .apply (
215
- df , config , header , dataset_features , validation_df = validation_df , process_id = process_id
222
+ df ,
223
+ config ,
224
+ header ,
225
+ dataset_features ,
226
+ validation_df = validation_df ,
227
+ process_id = process_id ,
228
+ silent = silent ,
216
229
)
217
230
218
231
elif enableGBM == True :
@@ -224,6 +237,7 @@ def fit(
224
237
dataset_features ,
225
238
validation_df = validation_df ,
226
239
process_id = process_id ,
240
+ silent = silent ,
227
241
)
228
242
# classification = True
229
243
@@ -235,12 +249,19 @@ def fit(
235
249
dataset_features ,
236
250
validation_df = validation_df ,
237
251
process_id = process_id ,
252
+ silent = silent ,
238
253
)
239
254
# classification = False
240
255
241
256
elif enableRandomForest == True :
242
257
trees = randomforest .apply (
243
- df , config , header , dataset_features , validation_df = validation_df , process_id = process_id
258
+ df ,
259
+ config ,
260
+ header ,
261
+ dataset_features ,
262
+ validation_df = validation_df ,
263
+ process_id = process_id ,
264
+ silent = silent ,
244
265
)
245
266
else : # regular decision tree building
246
267
root = 1
@@ -264,22 +285,23 @@ def fit(
264
285
main_process_id = process_id ,
265
286
)
266
287
267
- logger .info ("-------------------------" )
268
- logger .info (f"finished in { time .time () - begin } seconds" )
288
+ if silent is False :
289
+ logger .info ("-------------------------" )
290
+ logger .info (f"finished in { time .time () - begin } seconds" )
269
291
270
292
obj = {"trees" : trees , "alphas" : alphas , "config" : config , "nan_values" : nan_values }
271
293
272
294
# -----------------------------------------
273
295
274
296
# train set accuracy
275
297
df = base_df .copy ()
276
- evaluate (obj , df , task = "train" )
298
+ trainset_evaluation = evaluate (obj , df , task = "train" , silent = silent )
299
+ obj ["evaluation" ] = {"train" : trainset_evaluation }
277
300
278
301
# validation set accuracy
279
302
if isinstance (validation_df , pd .DataFrame ):
280
- evaluate (obj , validation_df , task = "validation" )
281
-
282
- # -----------------------------------------
303
+ validationset_evaluation = evaluate (obj , validation_df , task = "validation" , silent = silent )
304
+ obj ["evaluation" ]["validation" ] = validationset_evaluation
283
305
284
306
return obj
285
307
@@ -455,31 +477,38 @@ def restoreTree(module_name) -> Any:
455
477
return functions .restoreTree (module_name )
456
478
457
479
458
- def feature_importance (rules : Union [str , list ]) -> pd .DataFrame :
480
+ def feature_importance (rules : Union [str , list ], silent : bool = False ) -> pd .DataFrame :
459
481
"""
460
482
Show the feature importance values of a built model
461
483
Args:
462
- rules (str or list): e.g. decision_rules = "outputs/rules/rules.py"
484
+ rules (str or list): e.g. decision_rules = "outputs/rules/rules.py"
463
485
or this could be retrieved from built model as shown below.
464
486
465
- decision_rules = []
466
- for tree in model["trees"]:
467
- rule = .__dict__["__spec__"].origin
468
- decision_rules.append(rule)
487
+ ```python
488
+ decision_rules = []
489
+ for tree in model["trees"]:
490
+ rule = .__dict__["__spec__"].origin
491
+ decision_rules.append(rule)
492
+ ```
493
+ silent (bool): set this to True if you do want to see
494
+ any informative logs.
469
495
Returns:
470
496
feature importance (pd.DataFrame)
471
497
"""
472
498
473
499
if not isinstance (rules , list ):
474
500
rules = [rules ]
475
- logger .info (f"rules: { rules } " )
501
+
502
+ if silent is False :
503
+ logger .info (f"rules: { rules } " )
476
504
477
505
# -----------------------------
478
506
479
507
dfs = []
480
508
481
509
for rule in rules :
482
- logger .info ("Decision rule: {rule}" )
510
+ if silent is False :
511
+ logger .info (f"Decision rule: { rule } " )
483
512
484
513
with open (rule , "r" , encoding = "UTF-8" ) as file :
485
514
lines = file .readlines ()
@@ -564,17 +593,23 @@ def feature_importance(rules: Union[str, list]) -> pd.DataFrame:
564
593
565
594
566
595
def evaluate (
567
- model : dict , df : pd .DataFrame , target_label : str = "Decision" , task : str = "test"
568
- ) -> None :
596
+ model : dict ,
597
+ df : pd .DataFrame ,
598
+ target_label : str = "Decision" ,
599
+ task : str = "test" ,
600
+ silent : bool = False ,
601
+ ) -> dict :
569
602
"""
570
603
Evaluate the performance of a built model on a data set
571
604
Args:
572
605
model (dict): built model which is the output of fit function
573
606
df (pandas data frame): data frame you would like to evaluate
574
607
target_label (str): target label
575
608
task (string): set this to train, validation or test
609
+ silent (bool): set this to True if you do not want to see
610
+ any informative logs
576
611
Returns:
577
- None
612
+ evaluation results (dict)
578
613
"""
579
614
580
615
# --------------------------
@@ -598,4 +633,4 @@ def evaluate(
598
633
df ["Decision" ] = df ["Decision" ].astype (str )
599
634
df ["Prediction" ] = df ["Prediction" ].astype (str )
600
635
601
- cb_eval .evaluate (df , task = task )
636
+ return cb_eval .evaluate (df , task = task , silent = silent )
0 commit comments