Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions custom_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,64 @@
# Your code
#==============Your Name=====================
#Dont remove the following snippet and follow the same


#==================================================shrihari====================================================================
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import OrdinalEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline


class ST_DiamondFeatureEngineer(BaseEstimator, TransformerMixin):
def __init__(self):
self.cut_order = ['Fair', 'Good', 'Very Good', 'Premium', 'Ideal']
self.color_order = ['J', 'I', 'H', 'G', 'F', 'E', 'D']
self.clarity_order = ['I1', 'SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']
self.ordinal_encoder = OrdinalEncoder(categories=[self.cut_order, self.color_order, self.clarity_order])
self.numerical_features = []
self.categorical_cols = ['cut_grade', 'color_grade', 'clarity_grade']

def fit(self, X, y=None):

df = X.copy()

self.ordinal_encoder.fit(df[self.categorical_cols])

self.numerical_features = df.select_dtypes(include=np.number).columns.drop(self.categorical_cols, errors='ignore')
return self

def transform(self, X, y=None):
df = X.copy()

if 'price_per_carat' in df.columns:
df = df.drop(columns=['price_per_carat'])

df[self.categorical_cols] = self.ordinal_encoder.transform(df[self.categorical_cols])

for col in self.numerical_features:
if col in df.columns:
Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df[col] = df[col].clip(lower_bound, upper_bound)

df['density'] = df['carat_weight'] / (df['volume_mm3'] + 0.0001)
df['depth_per_width'] = df['depth_percentage'] / (df['width_mm'] + 0.0001)
df['carat_x_clarity'] = df['carat_weight'] * (df['clarity_grade'] + 1)
df['carat_x_color'] = df['carat_weight'] * (df['color_grade'] + 1)
df['carat_x_cut'] = df['carat_weight'] * (df['cut_grade'] + 1)

return df


submission_pipeline = Pipeline(steps=[
('feature_engineer', ST_DiamondFeatureEngineer()),
('model', RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
])
#==============================shrihari=====================================================================================