1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | # import python libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt # visualizing data # %matplotlib inline import seaborn as sns # import csv file df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Diwali Sales Data.csv', encoding= 'unicode_escape') from google.colab import drive drive.mount('/content/drive') df.shape df.head() df.info() #drop unrelated/blank columns df.drop(['Status', 'unnamed1'], axis=1, inplace=True) #check for null values pd.isnull(df).sum() # drop null values df.dropna(inplace=True) # change data type df['Amount'] = df['Amount'].astype('int') df['Amount'].dtypes df.columns #rename column df.rename(columns= {'Marital_Status':'Shaadi'}) # describe() method returns description of the data in the DataFrame (i.e. count, mean, std, etc) df.describe() # use describe() for specific columns df[['Age', 'Orders', 'Amount']].describe() """# Exploratory Data Analysis ### Gender """ # plotting a bar chart for Gender and it's count ax = sns.countplot(x = 'Gender',data = df) for bars in ax.containers: ax.bar_label(bars) # plotting a bar chart for gender vs total amount sales_gen = df.groupby(['Gender'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False) sns.barplot(x = 'Gender',y= 'Amount' ,data = sales_gen) """*From above graphs we can see that most of the buyers are females and even the purchasing power of females are greater than men* ### Age """ ax = sns.countplot(data = df, x = 'Age Group', hue = 'Gender') for bars in ax.containers: ax.bar_label(bars) # Total Amount vs Age Group sales_age = df.groupby(['Age Group'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False) sns.barplot(x = 'Age Group',y= 'Amount' ,data = sales_age) """*From above graphs we can see that most of the buyers are of age group between 26-35 yrs female* ### State """ # total number of orders from top 10 states sales_state = df.groupby(['State'], as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=False).head(10) sns.set(rc={'figure.figsize':(15,5)}) sns.barplot(data = sales_state, x = 'State',y= 'Orders') # total amount/sales from top 10 states sales_state = df.groupby(['State'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False).head(10) sns.set(rc={'figure.figsize':(15,5)}) sns.barplot(data = sales_state, x = 'State',y= 'Amount') """*From above graphs we can see that most of the orders & total sales/amount are from Uttar Pradesh, Maharashtra and Karnataka respectively* ### Marital Status """ ax = sns.countplot(data = df, x = 'Marital_Status') sns.set(rc={'figure.figsize':(7,5)}) for bars in ax.containers: ax.bar_label(bars) sales_state = df.groupby(['Marital_Status', 'Gender'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False) sns.set(rc={'figure.figsize':(6,5)}) sns.barplot(data = sales_state, x = 'Marital_Status',y= 'Amount', hue='Gender') """*From above graphs we can see that most of the buyers are married (women) and they have high purchasing power* ### Occupation """ sns.set(rc={'figure.figsize':(20,5)}) ax = sns.countplot(data = df, x = 'Occupation') for bars in ax.containers: ax.bar_label(bars) sales_state = df.groupby(['Occupation'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False) sns.set(rc={'figure.figsize':(20,5)}) sns.barplot(data = sales_state, x = 'Occupation',y= 'Amount') """*From above graphs we can see that most of the buyers are working in IT, Healthcare and Aviation sector* ### Product Category """ sns.set(rc={'figure.figsize':(20,5)}) ax = sns.countplot(data = df, x = 'Product_Category') for bars in ax.containers: ax.bar_label(bars) sales_state = df.groupby(['Product_Category'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False).head(10) sns.set(rc={'figure.figsize':(20,5)}) sns.barplot(data = sales_state, x = 'Product_Category',y= 'Amount') """*From above graphs we can see that most of the sold products are from Food, Clothing and Electronics category*""" sales_state = df.groupby(['Product_ID'], as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=False).head(10) sns.set(rc={'figure.figsize':(20,5)}) sns.barplot(data = sales_state, x = 'Product_ID',y= 'Orders') # top 10 most sold products (same thing as above) fig1, ax1 = plt.subplots(figsize=(12,7)) df.groupby('Product_ID')['Orders'].sum().nlargest(10).sort_values(ascending=False).plot(kind='bar') """## Conclusion: ### *Married women age group 26-35 yrs from UP, Maharastra and Karnataka working in IT, Healthcare and Aviation are more likely to buy products from Food, Clothing and Electronics category* """ |
Link for Google Collaboratry: Click Here