Skip to content

Commit 95338e0

Browse files
committed
Fix pandas chained assignment warnings
1 parent b6d54af commit 95338e0

1 file changed

Lines changed: 20 additions & 13 deletions

File tree

Diabetes Prediction [END 2 END]/diabetes_pipeline/data_preprocessing.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,46 @@
22

33
import pandas as pd
44
import numpy as np
5+
from pathlib import Path
56
from sklearn.model_selection import train_test_split
67
from sklearn.preprocessing import StandardScaler
78

8-
def load_and_preprocess(csv_path='dataset/kaggle_diabetes.csv', test_size=0.2, random_state=0):
9+
BASE_DIR = Path(__file__).resolve().parent
10+
11+
def load_and_preprocess(
12+
csv_path=BASE_DIR / "dataset" / "kaggle_diabetes.csv",
13+
test_size=0.2,
14+
random_state=0
15+
):
916
# Load dataset
1017
df = pd.read_csv(csv_path)
11-
12-
# Rename column
18+
19+
# Rename column (IMPORTANT: this defines feature names forever)
1320
df = df.rename(columns={'DiabetesPedigreeFunction': 'DPF'})
14-
15-
# Replace 0s with NaN
21+
22+
# Replace invalid zeros
1623
cols_with_zero = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
1724
df[cols_with_zero] = df[cols_with_zero].replace(0, np.nan)
18-
25+
1926
# Fill NaNs
2027
df['Glucose'] = df['Glucose'].fillna(df['Glucose'].mean())
2128
df['BloodPressure'] = df['BloodPressure'].fillna(df['BloodPressure'].mean())
2229
df['SkinThickness'] = df['SkinThickness'].fillna(df['SkinThickness'].median())
2330
df['Insulin'] = df['Insulin'].fillna(df['Insulin'].median())
2431
df['BMI'] = df['BMI'].fillna(df['BMI'].median())
25-
26-
# Features & Target
32+
33+
# Features & target
2734
X = df.drop(columns='Outcome')
2835
y = df['Outcome']
29-
30-
# Train/Test Split
36+
37+
# Split
3138
X_train, X_test, y_train, y_test = train_test_split(
3239
X, y, test_size=test_size, random_state=random_state
3340
)
34-
35-
# Feature Scaling
41+
42+
# Scaling
3643
scaler = StandardScaler()
3744
X_train_scaled = scaler.fit_transform(X_train)
3845
X_test_scaled = scaler.transform(X_test)
39-
46+
4047
return X_train_scaled, X_test_scaled, y_train, y_test, scaler

0 commit comments

Comments
 (0)