Skip to content

Commit b6d54af

Browse files
committed
Update train.py and predict.py with new preprocessing and prediction logic
1 parent 77d454b commit b6d54af

2 files changed

Lines changed: 59 additions & 37 deletions

File tree

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,45 @@
1+
import argparse
12
import joblib
2-
import numpy as np
3-
from config import MODEL_PATH, SCALER_PATH
4-
5-
class DiabetesPredictor:
6-
def __init__(self):
7-
self.model = joblib.load(MODEL_PATH)
8-
self.scaler = joblib.load(SCALER_PATH)
9-
10-
def predict(self, features: list) -> int:
11-
"""
12-
features order:
13-
[Pregnancies, Glucose, BloodPressure, SkinThickness,
14-
Insulin, BMI, DPF, Age]
15-
"""
16-
features = np.array(features).reshape(1, -1)
17-
features = self.scaler.transform(features)
18-
return int(self.model.predict(features)[0])
19-
20-
21-
if __name__ == "__main__":
22-
predictor = DiabetesPredictor()
23-
24-
sample_input = [2, 81, 72, 15, 76, 30.1, 0.547, 25]
25-
result = predictor.predict(sample_input)
26-
27-
if result == 1:
28-
print("Oops! You have diabetes.")
29-
else:
30-
print("Great! You don't have diabetes.")
3+
import pandas as pd
4+
5+
MODEL_PATH = "model/diabetes_model.pkl"
6+
SCALER_PATH = "model/scaler.pkl"
7+
8+
parser = argparse.ArgumentParser()
9+
parser.add_argument("--pregnancies", type=int, required=True)
10+
parser.add_argument("--glucose", type=float, required=True)
11+
parser.add_argument("--bp", type=float, required=True)
12+
parser.add_argument("--skin", type=float, required=True)
13+
parser.add_argument("--insulin", type=float, required=True)
14+
parser.add_argument("--bmi", type=float, required=True)
15+
parser.add_argument("--dpf", type=float, required=True)
16+
parser.add_argument("--age", type=int, required=True)
17+
18+
args = parser.parse_args()
19+
20+
# Load model & scaler
21+
model = joblib.load(MODEL_PATH)
22+
scaler = joblib.load(SCALER_PATH)
23+
24+
# IMPORTANT: feature names must match training
25+
input_data = pd.DataFrame([{
26+
"Pregnancies": args.pregnancies,
27+
"Glucose": args.glucose,
28+
"BloodPressure": args.bp,
29+
"SkinThickness": args.skin,
30+
"Insulin": args.insulin,
31+
"BMI": args.bmi,
32+
"DPF": args.dpf,
33+
"Age": args.age
34+
}])
35+
36+
# Scale & predict
37+
input_scaled = scaler.transform(input_data)
38+
prediction = model.predict(input_scaled)[0]
39+
40+
if prediction == 1:
41+
print("⚠️ Diabetes detected")
42+
else:
43+
print("✅ No diabetes detected")
44+
45+
Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
1-
# diabetes_pipeline/train.py
2-
1+
import logging
32
import joblib
43
from sklearn.ensemble import RandomForestClassifier
54
from data_preprocessing import load_and_preprocess
6-
from config import MODEL_DIR, MODEL_PATH, SCALER_PATH
5+
from config import MODEL_PATH, SCALER_PATH, MODEL_DIR
76

8-
# Ensure model directory exists
9-
MODEL_DIR.mkdir(exist_ok=True)
7+
# Logging setup
8+
logging.basicConfig(
9+
filename="logs/training.log",
10+
level=logging.INFO,
11+
format="%(asctime)s - %(levelname)s - %(message)s"
12+
)
13+
14+
logging.info("Training started")
1015

11-
# Load and preprocess data
16+
# Load data
1217
X_train, X_test, y_train, y_test, scaler = load_and_preprocess()
1318

1419
# Train model
1520
classifier = RandomForestClassifier(n_estimators=20, random_state=0)
1621
classifier.fit(X_train, y_train)
1722

18-
# Save model and scaler
23+
# Save artifacts
24+
MODEL_DIR.mkdir(exist_ok=True)
1925
joblib.dump(classifier, MODEL_PATH)
2026
joblib.dump(scaler, SCALER_PATH)
2127

22-
print("Model and scaler saved successfully.")
28+
logging.info("Model and scaler saved successfully")
29+

0 commit comments

Comments
 (0)