Pipelines
General Info
Useful Snippets

1.0.1 Create a pipeline containing sub-pipelines

Example:

# Define and instantiate a full_pipeline

from sklearn.pipeline import Pipeline

# import the transformers that you'll use in your example:
from sklearn.impute        import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

# used to process columns seporately in mini-pipelines that get combined into a single full pipeline
from sklearn.compose       import ColumnTransformer


###############
# Numeric pipeline
num_columns = ['one', 'two', 'three'] # specify the numeric columns used in this mini-pipeline

# Create a pipeline to process your numeric columns
num_pipeline = Pipeline(steps = [
            ( 'imputer'   , SimpleImputer(strategy="median") )
          , ( 'std_scaler', StandardScaler()                 )
     ])

#################
# Categorical pipeline
cat_columns = ['four'] # specify the categorical columns used in this mini-pipeline

# Create a pipeline to process your categorical columns
cat_pipeline = Pipeline(steps = [
          ( 'onehotencoder'   , OneHotEncoder() )
     ])

################
# Full Pipeline
# ("Mini-pipe Name" , mini-pipe , list_of_columns)
full_pipeline = ColumnTransformer([
          ( "num",  num_pipeline ,  num_columns )
        , ( "cat",  cat_pipeline ,  cat_columns )
    ])