# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
#Read in data files
city_data = pd.read_csv("Pyber/raw_data/city_data.csv")
ride_data = pd.read_csv("Pyber/raw_data/ride_data.csv")
city_data.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
city | driver_count | type | |
---|---|---|---|
0 | Kelseyland | 63 | Urban |
1 | Nguyenbury | 8 | Urban |
2 | East Douglas | 12 | Urban |
3 | West Dawnfurt | 34 | Urban |
4 | Rodriguezburgh | 52 | Urban |
ride_data.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
city | date | fare | ride_id | |
---|---|---|---|---|
0 | Sarabury | 2016-01-16 13:49:27 | 38.35 | 5403689035038 |
1 | South Roy | 2016-01-02 18:42:34 | 17.49 | 4036272335942 |
2 | Wiseborough | 2016-01-21 17:35:29 | 44.18 | 3645042422587 |
3 | Spencertown | 2016-07-31 14:53:22 | 6.87 | 2242596575892 |
4 | Nguyenbury | 2016-07-09 04:42:44 | 6.28 | 1543057793673 |
#merges imported data to have 1 final data table to work off of
final_data = pd.merge(ride_data,city_data, how = 'left', on = 'city')
final_data.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
city | date | fare | ride_id | driver_count | type | |
---|---|---|---|---|---|---|
0 | Sarabury | 2016-01-16 13:49:27 | 38.35 | 5403689035038 | 46 | Urban |
1 | South Roy | 2016-01-02 18:42:34 | 17.49 | 4036272335942 | 35 | Urban |
2 | Wiseborough | 2016-01-21 17:35:29 | 44.18 | 3645042422587 | 55 | Urban |
3 | Spencertown | 2016-07-31 14:53:22 | 6.87 | 2242596575892 | 68 | Urban |
4 | Nguyenbury | 2016-07-09 04:42:44 | 6.28 | 1543057793673 | 8 | Urban |
#Creates the pie plot for the % of total Rides by City Type
#uses value_counts of "type" from the final_data dataframe
#total = final_data['type'].count()
type_plot = plt.pie(final_data['type'].value_counts(), explode = [0.10,0, 0], labels = final_data['type'].unique(),
autopct='%.2f',colors = ['LightCoral', 'LightSkyBlue', 'Gold'],shadow = True, startangle = 240)
plt.title('% of Total Rides by City Type')
plt.savefig('rides_city_type.png')
plt.show()
#total
#Creates pie plot of % of total fare by city type
#calculates sum of all the fares, lists of sums for each city type
#the for loop calculates the average for each sum and converts it to a percentage
fare_sum = final_data['fare'].sum()
sum_list = []
temp = final_data.loc[final_data['type'] == 'Urban']
urban_sum = temp['fare'].sum()
sum_list.append(urban_sum)
temp = final_data.loc[final_data['type'] == 'Suburban']
sub_sum = temp['fare'].sum()
sum_list.append(sub_sum)
temp = final_data.loc[final_data['type'] == 'Rural']
rural_sum = temp['fare'].sum()
sum_list.append(rural_sum)
i = 0
for item in sum_list:
sum_list[i] = (sum_list[i]/fare_sum)*100
i = i+1
fare_type_plot = plt.pie(sum_list, explode = [0.05, 0 , 0], labels = final_data['type'].unique(),autopct = '%.2f'
, colors = ['LightCoral', 'LightSkyBlue', 'Gold'],shadow = True, startangle = 240)
plt.title('% of Total Fares by City Type')
plt.savefig('fares_city_type.png')
plt.show()
#creates pie plot of % of total drivers by city type
#same process as above but uses driver_count rather than fare
driver_sum = final_data['driver_count'].sum()
sum_list = []
temp = final_data.loc[final_data['type'] == 'Urban']
urban_sum = temp['driver_count'].sum()
sum_list.append(urban_sum)
temp = final_data.loc[final_data['type'] == 'Suburban']
sub_sum = temp['driver_count'].sum()
sum_list.append(sub_sum)
temp = final_data.loc[final_data['type'] == 'Rural']
rural_sum = temp['driver_count'].sum()
sum_list.append(rural_sum)
i = 0
for item in sum_list:
sum_list[i] = (sum_list[i]/fare_sum)*100
i = i+1
fare_type_plot = plt.pie(sum_list, explode = [0.20, 0 , 0], labels = final_data['type'].unique(),autopct = '%.2f'
,colors = ['LightCoral', 'LightSkyBlue', 'Gold'],shadow = True, startangle = 210)
plt.title('% of Total Drivers by City Type')
plt.savefig('drivers_city_type.png')
plt.show()
final_data.head()
<style>
.dataframe thead tr:only-child th {
text-align: right;
}
</style>
.dataframe thead th {
text-align: left;
}
.dataframe tbody tr th {
vertical-align: top;
}
city | date | fare | ride_id | driver_count | type | |
---|---|---|---|---|---|---|
0 | Sarabury | 2016-01-16 13:49:27 | 38.35 | 5403689035038 | 46 | Urban |
1 | South Roy | 2016-01-02 18:42:34 | 17.49 | 4036272335942 | 35 | Urban |
2 | Wiseborough | 2016-01-21 17:35:29 | 44.18 | 3645042422587 | 55 | Urban |
3 | Spencertown | 2016-07-31 14:53:22 | 6.87 | 2242596575892 | 68 | Urban |
4 | Nguyenbury | 2016-07-09 04:42:44 | 6.28 | 1543057793673 | 8 | Urban |
#creates bubble chart for the ride sharing data
#first 3 sections are for getting necessary data and separating them by type
#size is stored as _size, ride count is stored as rides_city_ using value_counts()
#for loops are mkade to get the average fare for each city
urban_data = final_data[final_data['type']=='Urban']
rural_data = final_data[final_data['type']=='Rural']
sub_data = final_data[final_data['type']=='Suburban']
urban_size = (urban_data['driver_count'])
rural_size = (rural_data['driver_count'])
sub_size = (sub_data['driver_count'])
rides_city_urban = urban_data['city'].value_counts()
rides_city_rural = rural_data['city'].value_counts()
rides_city_sub = sub_data['city'].value_counts()
urban_temp = []
urban_temp_avg = 0
for city in urban_data['city'].unique():
temp_df = urban_data[urban_data['city']==city]
urban_temp_avg = temp_df['fare'].mean()
urban_temp.append(urban_temp_avg)
rural_temp = []
rural_temp_avg = 0
for city in rural_data['city'].unique():
temp_df = rural_data[rural_data['city']==city]
rural_temp_avg = temp_df['fare'].mean()
rural_temp.append(rural_temp_avg)
sub_temp = []
sub_temp_avg = 0
for city in sub_data['city'].unique():
temp_df = sub_data[sub_data['city']==city]
sub_temp_avg = temp_df['fare'].mean()
sub_temp.append(sub_temp_avg)
#each city type plot is created separately
#titles and legend are added after it is created
urban_plot = plt.scatter(rides_city_urban,urban_temp, s = urban_size, c = 'lightcoral')
rural_plot = plt.scatter(rides_city_rural,rural_temp, s = rural_size, c = 'gold')
sub_plot = plt.scatter(rides_city_sub, sub_temp, s = sub_size, c = 'lightskyblue')
plt.style.use('seaborn-dark')
plt.title('Pyber Ride Sharing Data (2016)')
plt.xlabel('Total Number of Rides (Per City)')
plt.ylabel('Average Fare ($)')
plt.legend(['Urban','Rural','Suburban'],title = 'City Types')
plt.savefig('ride_sharing_data.png')
plt.show()
#print(len(rides_city_urban),len(urban_temp))
#len(urban_data['city'].unique())
#final_data['type'].value_counts()
#urban_data
#rural_data
#sub_data