!pip install --upgrade eazyml-counterfactual
!pip install gdown python-dotenv

import os
import pandas as pd
import eazyml as ez
from eazyml_counterfactual import (
        ez_cf_inference,
        ez_init        
)
import gdown

from dotenv import load_dotenv
load_dotenv()

True

ez_init(os.getenv('EAZYML_ACCESS_KEY'))

{'success': True,
 'message': 'Initialized successfully. You may revoke your consent to sharing usage stats anytime. You have exclusive paid access.'}

gdown.download_folder(id='1gWvCFW2cHqthUsPUQ0feOG4P41rpQwJC')

# Defining file paths for training and test datasets and specifying the outcome variable
train_file = os.path.join('data', "Mobile Price Ternary - Train Data.xlsx")
test_file = os.path.join('data', "Mobile Price Ternary - Test Data.xlsx")
outcome = "price_range"

# Loading the training dataset and the test dataset
train_df = pd.read_excel(train_file)
test_df = pd.read_excel(test_file)

# Display the first few rows of the training DataFrame for inspection
ez.ez_display_df(train_df.head())

# Define model parameters
model_options = {
    "model_type": "predictive",
}

# Build predictive model using EazyML API
build_model_response = ez.ez_build_model(train_df, outcome=outcome, options=model_options)

ez.ez_display_df(build_model_response['global_importance'])

ez.ez_display_df(build_model_response['model_performance'])

# Extract model information from the response dictionary
model_info = build_model_response["model_info"]

# Read test data from a CSV file into a pandas DataFrame
test_data = pd.read_excel(test_file)

# Make predictions using the model, requesting confidence scores and class probabilities
predicted_resp = ez.ez_predict(test_data, model_info, options={"confidence_score": True, "class_probability": True})

# Check if the prediction was successful
if predicted_resp['success']:
    print("Prediction successful")  
    predicted_df = predicted_resp['pred_df']  # Extract the predicted DataFrame
    ez.ez_display_df(predicted_df.head())  # Display the first few rows of the predicted DataFrame
else:
    print("Prediction failed")  
    print(predicted_resp['message'])

Prediction successful

# Define the selected features for prediction
selected_features = ['sc_w', 'n_cores', 'mobile_wt', 'talk_time', 'ram', 'px_width', 'px_height', 
                     'battery_power', 'pc', 'fc', 'm_dep', 'int_memory', 'sc_h']

# Define variant (modifiable) features
invariants = []
variants = [feature for feature in selected_features if feature not in invariants]

# Define configurable parameters for counterfactual inference
cf_options = {   
    "variants": variants,  
    "outcome_ordinality": "1",  # Desired outcome 
    "train_data": train_file  
}

# Specify the index of the test record for counterfactual inference
test_index_no = 0  
test_data = predicted_df.loc[[test_index_no]]  

# Perform Inference 
result, optimal_transition_df = ez_cf_inference(
    test_data=test_data,  
    outcome=outcome,  
    selected_features=selected_features,  
    model_info=model_info,
    options=cf_options  
)

# Summarizes whether an optimal transition was found and the improvement in outcome probability.
ez.ez_display_json(result)

{   'success': True,
    'message': 'Optimal transition found',
    'summary': {   'Actual Outcome': '0',
                   'Optimal Outcome': '1',
                   'Improvement in Probability': 0.635}}

# Details the feature changes needed to achieve the optimal outcome.
ez.ez_display_df(optimal_transition_df)

	battery_power	blue	clock_speed	dual_sim	fc	four_g	int_memory	m_dep	mobile_wt	n_cores	pc	px_height	px_width	ram	sc_h	sc_w	talk_time	three_g	touch_screen	wifi	price_range
0	1745	0	2.900000	0	0	1	3	0.900000	105	3	0	426	1629	1308	5	0	2	1	1	0	1
1	535	1	0.500000	1	8	1	54	0.500000	145	8	15	710	939	2674	14	8	10	1	0	0	2
2	1577	0	0.500000	1	0	1	42	0.300000	197	5	4	1045	1737	2060	19	6	12	1	0	0	2
3	1702	1	2.300000	0	12	1	52	0.500000	145	2	15	1397	1491	2501	16	12	4	1	0	0	3
4	707	0	2.100000	1	2	0	25	0.800000	131	3	17	495	574	3838	9	2	7	1	0	1	3

	Variable Name	Importance
0	mobile_wt	0.010000
1	int_memory	0.010000
2	talk_time	0.010000
3	sc_w	0.010000
4	px_width	0.080000
5	px_height	0.090000
6	battery_power	0.130000
7	ram	0.610000

	Model	Kappa	Accuracy
0	Logistic Regression	0.95	0.97
1	Gradient Boosting Classifier	0.89	0.91
2	Boosted Decision Trees with InformationGain	0.86	0.89
3	Bagged Decision Trees with Information Gain	0.84	0.88
4	Naive Bayes	0.75	0.81
5	Random Forest with Information Gain	0.61	0.71

	battery_power	blue	clock_speed	dual_sim	fc	four_g	int_memory	m_dep	mobile_wt	n_cores	pc	px_height	px_width	ram	sc_h	sc_w	talk_time	three_g	touch_screen	price_range	Probability_price_range_0	Probability_price_range_1	Probability_price_range_2	Probability_price_range_3	Predicted price_range	Confidence Score
0	1646	0	2.500000	0	3	1	25	0.600000	200	2	5	211	1608	686	8	6	11	1	1	0	0.919936	0.080052	0.000013	0.000000	0	91%
1	1182	0	0.500000	0	7	1	8	0.500000	138	8	16	275	986	2563	19	17	19	1	0	2	0.000058	0.122860	0.870659	0.006424	2	87%
2	1972	0	2.900000	0	9	0	14	0.400000	196	7	18	293	952	1316	8	1	8	1	1	1	0.146630	0.839048	0.014322	0.000000	1	83%
3	989	1	2.000000	0	4	0	17	0.200000	166	3	19	256	1394	3892	18	7	19	1	1	3	0.000000	0.000001	0.012143	0.987857	3	98%
4	615	1	0.500000	1	7	0	58	0.500000	130	5	8	1021	1958	1906	14	5	5	1	0	1	0.002843	0.753895	0.243139	0.000123	1	75%

	Feature	Actual	Optimal	Percentage Change	Absolute Change
0	sc_w	6.000000	4.800000	-20.000000	-1.200000
1	n_cores	2.000000	3.000000	50.000000	1.000000
2	mobile_wt	200.000000	176.000000	-12.000000	-24.000000
3	talk_time	11.000000	13.200000	20.000000	2.200000
4	ram	686.000000	823.200000	20.000000	137.200000
5	px_width	1608.000000	1906.880000	18.600000	298.880000
6	px_height	211.000000	253.200000	20.000000	42.200000
7	battery_power	1646.000000	1944.880000	18.200000	298.880000
8	pc	5.000000	4.000000	-20.000000	-1.000000
9	fc	3.000000	2.400000	-20.000000	-0.600000
10	m_dep	0.600000	1.000000	66.700000	0.400000
11	int_memory	25.000000	30.000000	20.000000	5.000000
12	sc_h	8.000000	9.000000	12.500000	1.000000

EazyML Counterfactual Template¶

Define Imports¶

1. Initialize EazyML¶

2. Define Dataset Files and Outcome Variable¶

3. Dataset Information¶

Columns in the Dataset:¶

3.1 Display the Dataset¶

4. EazyML Modeling¶

4.1 Building model using the EazyML Modeling API¶

4.2 Feature Importance¶

4.3 Model Importance¶

4.4 Predict Using the Trained EazyML Model¶

5. EazyML Counterfactual Inference¶

5.1 Define Counterfactual Inference Configuration¶

5.2 Perform Counterfactual Inference¶

5.3 Display Results¶