-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHouse Price Prediction1.py
60 lines (45 loc) · 1.78 KB
/
House Price Prediction1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib
# Step 1: Load the dataset
file = r'C:\Users\ASUS\Downloads\house_prices.csv'
df = pd.read_csv(file)
# Display the first few rows and basic information
print("First few rows of the dataset:")
print(df.head())
print("\nBasic information about the dataset:")
print(df.info())
# Summary statistics
print("\nSummary statistics:")
print(df.describe())
# Step 2: Data Preprocessing
# Drop unnecessary columns if any
df.drop(columns=['Unnamed: 6', 'Unnamed: 7'], inplace=True)
# Encode categorical variables (if any)
df = pd.get_dummies(df, columns=['location'])
# Step 3: Feature Engineering (if any)
# Example of adding a new feature (total_rooms)
df['total_rooms'] = df['bedrooms'] + df['bathrooms']
# Step 4: Model Building and Evaluation
# Separate features and target variable
X = df.drop(columns=['id', 'price'])
y = df['price']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
# Check the shape of X and y after splitting
print(f"\nShape of X_train: {X_train.shape}, Shape of X_test: {X_test.shape}")
print(f"Shape of y_train: {y_train.shape}, Shape of y_test: {y_test.shape}")
# Initialize the model
model = LinearRegression()
# Train the model
model.fit(X_train, y_train)
# Predict on test data
y_pred = model.predict(X_test)
# Evaluate the model
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"\nRoot Mean Squared Error: {rmse}")
# Save the trained model
joblib.dump(model, 'house_price_model.pkl')
print("\nTrained model saved as 'house_price_model.pkl'")