1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | # -*- coding: utf-8 -*- """ Importing the Dependencies """ import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import sklearn.datasets from sklearn.model_selection import train_test_split from xgboost import XGBRegressor from sklearn import metrics """Importing the Boston House Price Dataset""" house_price_dataset = sklearn.datasets.load_boston() print(house_price_dataset) # Loading the dataset to a Pandas DataFrame house_price_dataframe = pd.DataFrame(house_price_dataset.data, columns = house_price_dataset.feature_names) # Print First 5 rows of our DataFrame house_price_dataframe.head() # add the target (price) column to the DataFrame house_price_dataframe['price'] = house_price_dataset.target house_price_dataframe.head() # checking the number of rows and Columns in the data frame house_price_dataframe.shape # check for missing values house_price_dataframe.isnull().sum() # statistical measures of the dataset house_price_dataframe.describe() """Understanding the correlation between various features in the dataset 1. Positive Correlation 2. Negative Correlation """ correlation = house_price_dataframe.corr() # constructing a heatmap to nderstand the correlation plt.figure(figsize=(10,10)) sns.heatmap(correlation, cbar=True, square=True, fmt='.1f', annot=True, annot_kws={'size':8}, cmap='Blues') """Splitting the data and Target""" X = house_price_dataframe.drop(['price'], axis=1) Y = house_price_dataframe['price'] print(X) print(Y) """Splitting the data into Training data and Test data""" X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2) print(X.shape, X_train.shape, X_test.shape) """Model Training XGBoost Regressor """ # loading the model model = XGBRegressor() # training the model with X_train model.fit(X_train, Y_train) """Evaluation Prediction on training data """ # accuracy for prediction on training data training_data_prediction = model.predict(X_train) print(training_data_prediction) # R squared error score_1 = metrics.r2_score(Y_train, training_data_prediction) # Mean Absolute Error score_2 = metrics.mean_absolute_error(Y_train, training_data_prediction) print("R squared error : ", score_1) print('Mean Absolute Error : ', score_2) """Visualizing the actual Prices and predicted prices""" plt.scatter(Y_train, training_data_prediction) plt.xlabel("Actual Prices") plt.ylabel("Predicted Prices") plt.title("Actual Price vs Preicted Price") plt.show() """Prediction on Test Data """ # accuracy for prediction on test data test_data_prediction = model.predict(X_test) # R squared error score_1 = metrics.r2_score(Y_test, test_data_prediction) # Mean Absolute Error score_2 = metrics.mean_absolute_error(Y_test, test_data_prediction) print("R squared error : ", score_1) print('Mean Absolute Error : ', score_2) |
Monday, 6 November 2023
House Price Prediction
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment