1.0.1 Create a pipeline containing sub-pipelines
Example:
# Define and instantiate a full_pipeline
from sklearn.pipeline import Pipeline
# import the transformers that you'll use in your example:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
# used to process columns seporately in mini-pipelines that get combined into a single full pipeline
from sklearn.compose import ColumnTransformer
###############
# Numeric pipeline
num_columns = ['one', 'two', 'three'] # specify the numeric columns used in this mini-pipeline
# Create a pipeline to process your numeric columns
num_pipeline = Pipeline(steps = [
( 'imputer' , SimpleImputer(strategy="median") )
, ( 'std_scaler', StandardScaler() )
])
#################
# Categorical pipeline
cat_columns = ['four'] # specify the categorical columns used in this mini-pipeline
# Create a pipeline to process your categorical columns
cat_pipeline = Pipeline(steps = [
( 'onehotencoder' , OneHotEncoder() )
])
################
# Full Pipeline
# ("Mini-pipe Name" , mini-pipe , list_of_columns)
full_pipeline = ColumnTransformer([
( "num", num_pipeline , num_columns )
, ( "cat", cat_pipeline , cat_columns )
])