1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | """ Importing the Dependencies """ import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.linear_model import Lasso from sklearn import metrics """Data Collection and Processing""" # loading the data from csv file to pandas dataframe car_dataset = pd.read_csv('/content/car data.csv') # inspecting the first 5 rows of the dataframe car_dataset.head() # checking the number of rows and columns car_dataset.shape # getting some information about the dataset car_dataset.info() # checking the number of missing values car_dataset.isnull().sum() # checking the distribution of categorical data print(car_dataset.Fuel_Type.value_counts()) print(car_dataset.Seller_Type.value_counts()) print(car_dataset.Transmission.value_counts()) """Encoding the Categorical Data""" # encoding "Fuel_Type" Column car_dataset.replace({'Fuel_Type':{'Petrol':0,'Diesel':1,'CNG':2}},inplace=True) # encoding "Seller_Type" Column car_dataset.replace({'Seller_Type':{'Dealer':0,'Individual':1}},inplace=True) # encoding "Transmission" Column car_dataset.replace({'Transmission':{'Manual':0,'Automatic':1}},inplace=True) car_dataset.head() """Splitting the data and Target""" X = car_dataset.drop(['Car_Name','Selling_Price'],axis=1) Y = car_dataset['Selling_Price'] print(X) print(Y) """Splitting Training and Test data""" X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=2) """Model Training 1. Linear Regression """ # loading the linear regression model lin_reg_model = LinearRegression() lin_reg_model.fit(X_train,Y_train) """Model Evaluation""" # prediction on Training data training_data_prediction = lin_reg_model.predict(X_train) # R squared Error error_score = metrics.r2_score(Y_train, training_data_prediction) print("R squared Error : ", error_score) """Visualize the actual prices and Predicted prices""" plt.scatter(Y_train, training_data_prediction) plt.xlabel("Actual Price") plt.ylabel("Predicted Price") plt.title(" Actual Prices vs Predicted Prices") plt.show() # prediction on Training data test_data_prediction = lin_reg_model.predict(X_test) # R squared Error error_score = metrics.r2_score(Y_test, test_data_prediction) print("R squared Error : ", error_score) plt.scatter(Y_test, test_data_prediction) plt.xlabel("Actual Price") plt.ylabel("Predicted Price") plt.title(" Actual Prices vs Predicted Prices") plt.show() """2. Lasso Regression""" # loading the linear regression model lass_reg_model = Lasso() lass_reg_model.fit(X_train,Y_train) """Model Evaluation""" # prediction on Training data training_data_prediction = lass_reg_model.predict(X_train) # R squared Error error_score = metrics.r2_score(Y_train, training_data_prediction) print("R squared Error : ", error_score) """Visualize the actual prices and Predicted prices""" plt.scatter(Y_train, training_data_prediction) plt.xlabel("Actual Price") plt.ylabel("Predicted Price") plt.title(" Actual Prices vs Predicted Prices") plt.show() # prediction on Training data test_data_prediction = lass_reg_model.predict(X_test) # R squared Error error_score = metrics.r2_score(Y_test, test_data_prediction) print("R squared Error : ", error_score) plt.scatter(Y_test, test_data_prediction) plt.xlabel("Actual Price") plt.ylabel("Predicted Price") plt.title(" Actual Prices vs Predicted Prices") plt.show() |
No comments:
Post a Comment