serengil
diff --git a/‎.gitignore
Lines changed: 3 additions & 1 deletion b/‎.gitignore
Lines changed: 3 additions & 1 deletion
diff --git a/‎chefboost/commons/daemon.py
Lines changed: 33 additions & 0 deletions b/‎chefboost/commons/daemon.py
Lines changed: 33 additions & 0 deletions
diff --git a/‎chefboost/commons/functions.py
Lines changed: 72 additions & 13 deletions b/‎chefboost/commons/functions.py
Lines changed: 72 additions & 13 deletions
diff --git a/‎chefboost/training/Preprocess.py
Lines changed: 15 additions & 1 deletion b/‎chefboost/training/Preprocess.py
Lines changed: 15 additions & 1 deletion
@@ -3,6 +3,7 @@ __pycache__/*
 commons/__pycache__/*
 training/__pycache__/*
 tuning/__pycache__/*
+tests/__pycache__/*
 build/
 dist/
 Pipfile
@@ -18,4 +19,5 @@ chefboost/tuning/__pycache__/*
 .DS_Store
 chefboost/.DS_Store
 tests/.DS_Store
-.pytest_cache
+.pytest_cache
+*.pyc
@@ -0,0 +1,33 @@
+import multiprocessing
+import multiprocessing.pool
+
+class NoDaemonProcess(multiprocessing.Process):
+    """
+    NoDaemonProcess class for recursive parallel runs
+    """
+    def _get_daemon(self):
+        # make 'daemon' attribute always return False
+        return False
+
+    def _set_daemon(self, value):
+        pass
+
+    daemon = property(_get_daemon, _set_daemon)
+
+
+class NoDaemonContext(type(multiprocessing.get_context())):
+    """
+    NoDaemonContext class for recursive parallel runs
+    """
+    # pylint: disable=too-few-public-methods
+    Process = NoDaemonProcess
+
+
+class CustomPool(multiprocessing.pool.Pool):
+    """
+    MyPool class for recursive parallel runs
+    """
+    # pylint: disable=too-few-public-methods, abstract-method, super-with-arguments
+    def __init__(self, *args, **kwargs):
+        kwargs["context"] = NoDaemonContext()
+        super(CustomPool, self).__init__(*args, **kwargs)
@@ -1,9 +1,12 @@
 import pathlib
 import os
+import sys
 from os import path
+from types import ModuleType
 import multiprocessing
-from typing import Optional
+from typing import Optional, Union
 import numpy as np
+import pandas as pd
 from chefboost import Chefboost as cb
 from chefboost.commons.logger import Logger
 from chefboost.commons.module import load_module
@@ -13,7 +16,15 @@
 logger = Logger(module="chefboost/commons/functions.py")
 
 
-def bulk_prediction(df, model):
+def bulk_prediction(df: pd.DataFrame, model: dict) -> None:
+    """
+    Perform a bulk prediction on given dataframe
+    Args:
+        df (pd.DataFrame): input data frame
+        model (dict): built model
+    Returns:
+        None
+    """
     predictions = []
     for _, instance in df.iterrows():
         features = instance.values[0:-1]
@@ -23,17 +34,35 @@ def bulk_prediction(df, model):
     df["Prediction"] = predictions
 
 
-def restoreTree(module_name):
+def restoreTree(module_name: str) -> ModuleType:
+    """
+    Restores a built tree
+    """
     return load_module(module_name)
 
 
-def softmax(w):
+def softmax(w: list) -> np.ndarray:
+    """
+    Softmax function
+    Args:
+        w (list): probabilities
+    Returns:
+        result (numpy.ndarray): softmax of inputs
+    """
     e = np.exp(np.array(w, dtype=np.float32))
     dist = e / np.sum(e)
     return dist
 
 
-def sign(x):
+def sign(x: Union[int, float]) -> int:
+    """
+    Sign function
+    Args:
+        x (int or float): input
+    Returns
+        result (int) 1 for positive inputs, -1 for negative
+            inputs, 0 for neutral input
+    """
     if x > 0:
         return 1
     elif x < 0:
@@ -42,7 +71,14 @@ def sign(x):
         return 0
 
 
-def formatRule(root):
+def formatRule(root: int) -> str:
+    """
+    Format a rule in the output file (tree)
+    Args:
+        root (int): degree of current rule
+    Returns:
+        formatted rule (str)
+    """
     resp = ""
 
     for _ in range(0, root):
@@ -51,20 +87,37 @@ def formatRule(root):
     return resp
 
 
-def storeRule(file, content):
+def storeRule(file: str, content: str) -> None:
+    """
+    Store a custom rule
+    Args:
+        file (str): target file
+        content (str): content to store
+    Returns:
+        None
+    """
     with open(file, "a+", encoding="UTF-8") as f:
         f.writelines(content)
         f.writelines("\n")
 
 
-def createFile(file, content):
+def createFile(file: str, content: str) -> None:
+    """
+    Create a file with given content
+    Args:
+        file (str): target file
+        content (str): content to store
+    Returns
+        None
+    """
     with open(file, "w", encoding="UTF-8") as f:
         f.write(content)
 
 
-def initializeFolders():
-    import sys
-
+def initializeFolders() -> None:
+    """
+    Initialize required folders
+    """
     sys.path.append("..")
     pathlib.Path("outputs").mkdir(parents=True, exist_ok=True)
     pathlib.Path("outputs/data").mkdir(parents=True, exist_ok=True)
@@ -97,8 +150,14 @@ def initializeFolders():
     # ------------------------------------
 
 
-def initializeParams(config: Optional[dict] = None):
-
+def initializeParams(config: Optional[dict] = None) -> dict:
+    """
+    Arrange a chefboost configuration
+    Args:
+        config (dict): initial configuration
+    Returns:
+        config (dict): final configuration
+    """
     if config == None:
         config = {}
 
 
@@ -1,12 +1,26 @@
 import math
 import numpy as np
+import pandas as pd
 from chefboost.training import Training
 from chefboost.commons.logger import Logger
 
 logger = Logger(module="chefboost/training/Preprocess.py")
 
 
-def processContinuousFeatures(algorithm, df, column_name, entropy, config):
+def processContinuousFeatures(
+    algorithm: str, df: pd.DataFrame, column_name: str, entropy: float, config: dict
+) -> pd.DataFrame:
+    """
+    Find the best split point for numeric features
+    Args:
+        df (pd.DataFrame): (sub) training dataframe
+        column_name (str): current column to process
+        entropy (float): calculated entropy
+        config (dict): training configuration
+    Returns
+        df (pd.DataFrame): dataframe with numeric columns updated
+            to nominal (e.g. instead of continious age >40 or <=40)
+    """
     # if True:
     if df[column_name].nunique() <= 20:
         unique_values = sorted(df[column_name].unique())