import time
import pandas as pd
import numpy as np
CITY_DATA = { 'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv' }
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let\\'s explore some US bikeshare data!')
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
city=input('Enter the city\\n')
city=city.lower()
while (city!='chicago' and city!='new york city' and city!='washington' ):
print ("Invalid Input\\n")
city=input('Enter the city\\n')
city=city.lower()
# get user input for month (all, january, february, ... , june)
month=input("Enter the month or all to apply no month filter: ")
month=month.lower()
while (month!='january' and month!='february' and month!='march' and month!='april' and
month!='may' and month!='june' and month!='all' ):
print ("Invalid Input\\n")
month=input('Enter the month name or all to apply no month filter: ')
month=month.lower()
# get user input for day of week (all, monday, tuesday, ... sunday)
day=input('Enter the day name or all to apply no day filter: ')
day=day.lower()
while (day!='monday' and day!='tuesday' and day!='wednesday' and day!='thursday' and day!='friday' and day!=' saturday'
and day!='sunday' and day!='all'):
print ("Invalid Input\\n")
day=input('Enter the day name or all to apply no day filter: ')
day=day.lower()
print('-'*40)
return city, month, day
CITY_DATA = { 'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv' }
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
# load data file into a dataframe
df = pd.read_csv(CITY_DATA[city])
# convert the Start Time column to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'])
# extract month and day of week from Start Time to create new columns
df['month'] = df['Start Time'].dt.month
df['day_of_week'] = df['Start Time'].dt.day_name()
# filter by month if applicable
if month != 'all':
# use the index of the months list to get the corresponding int
months = ['january', 'february', 'march', 'april', 'may', 'june']
month = months.index(month) + 1
# filter by month to create the new dataframe
df = df[df['month'] == month]
# filter by day of week if applicable
if day != 'all':
# filter by day of week to create the new dataframe
df = df[df['day_of_week'] == day.title()]
return df
def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
print('\\nCalculating The Most Frequent Times of Travel...\\n')
start_time = time.time()
# display the most common month
df['month'] = df['Start Time'].dt.month
popular_month = df['month'].mode()[0]
print('Most Popular Start month:', popular_month)
# display the most common day of week
df['days_of_week'] = df['Start Time'].dt.day_name()
popular_day_of_week = df['days_of_week'].mode()[0]
print('Most Popular Start day of week:', popular_day_of_week)
# display the most common start hour
df['hour'] = df['Start Time'].dt.hour
popular_hour = df['hour'].mode()[0]
print('Most Popular Start Hour:', popular_hour)
print("\\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
print('\\nCalculating The Most Popular Stations and Trip...\\n')
start_time = time.time()
# display most commonly used start station
common_start=df['Start Station'].mode()[0]
print("The most commonly used start station: ",common_start)
# display most commonly used end station
common_end=df['End Station'].mode()[0]
print("The most commonly used end station: ",common_end)
# display most frequent combination of start station and end station trip
common_combination=(df['Start Station']+', '+df['End Station']).mode()[0]
print("The most frequent combination of start station and end station trip: ",common_combination)
print("\\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
print('\\nCalculating Trip Duration...\\n')
start_time = time.time()
# display total travel time
print("Total trip duration: ",df['Trip Duration'].sum())
# display mean travel time
print("Average trip duration: ",df['Trip Duration'].mean())
print("\\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def user_stats(df,city):
"""Displays statistics on bikeshare users."""
print('\\nCalculating User Stats...\\n')
start_time = time.time()
# Display counts of user types
print(df['User Type'].value_counts())
# Display counts of gender
if (city=='washington'):
print ("Washington data does not have Gender and Birth Year")
else:
print(df['Gender'].value_counts())
# Display earliest, most recent, and most common year of birth
print('Earliest year of birth: ',df["Birth Year"].min())
print('Most Recent year of birth: ',df["Birth Year"].max())
print('Most Common Year of birth: ',df["Birth Year"].mode()[0])
print("\\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def display_rows(city):
start_time = time.time()
#Taking answer from the user
dfwhole = pd.read_csv(CITY_DATA[city])
i=0
print('Do you want to see some rows of the data?')
answer=input("Enter yes or no: ")
answer=answer.lower()
#To display all columns--added after review
pd.set_option('display.max_columns',200)
#check for the valedity of the input
while(answer!='yes' and answer!='no'):
print("Enter a valid answer!")
answer=input("Enter yes or no: ")
answer=answer.lower()
#Showing some rows from the input
while (answer=='yes'):
print(dfwhole[i:i+5])
i=i+5
print('Do you want to see some rows of the data?')
answer=input("Enter yes or no: ")
answer=answer.lower()
if (answer=="no"):
break
print("\\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df,city)
display_rows(city)
restart = input('\\nWould you like to restart? Enter yes or no.\\n')
if restart.lower() != 'yes':
break
if __name__ == "__main__":
main()