Exam 2 Solutions

Open In Colab

Exam 2 Solutions#

import pandas as pa

df = pa.read_csv('https://raw.githubusercontent.com/nurfnick/Data_Viz/main/Activity_Dataset_V1.csv')

df.dtypes
Unnamed: 0          int64
activity_day       object
workout_type       object
distance          float64
time                int64
calories            int64
total_steps       float64
avg_speed         float64
avg_cadence       float64
max_cadence       float64
avg_pace           object
max_pace           object
min_pace           object
avg_heart_rate    float64
max_heart_rate    float64
min_heart_rate      int64
vo2_max(%)          int64
aerobic(%)          int64
anaerobic(%)        int64
intensive(%)        int64
light(%)            int64
dtype: object
df = df.drop('Unnamed: 0', axis = 1)

df.head()
activity_day workout_type distance time calories total_steps avg_speed avg_cadence max_cadence avg_pace max_pace min_pace avg_heart_rate max_heart_rate min_heart_rate vo2_max(%) aerobic(%) anaerobic(%) intensive(%) light(%)
0 2022-01-01 Freestyle 9.30 77 123 NaN 18.88 168.54 138.30 NaN NaN NaN 112.5 122.0 103 19 28 2 7 50
1 2022-01-01 Freestyle 3.44 96 55 NaN 29.65 125.92 292.81 NaN NaN NaN 111.0 122.0 100 42 28 2 29 88
2 2022-01-01 Indoor Cycling 6.34 85 33 NaN 17.85 81.93 323.69 NaN NaN NaN 95.0 90.0 100 1 32 0 22 43
3 2022-01-01 Walking 7.91 42 82 1571.0 22.10 29.63 180.16 07:58 28:58 07:58 83.0 85.0 81 3 22 0 24 65
4 2022-01-01 Open Water 8.99 36 131 NaN 25.83 64.55 342.89 NaN NaN NaN 138.0 166.0 110 7 0 5 21 88
import re
newcol = []
for col in df.columns:
  newcol.append(re.sub(r'\(%\)',"",col))

df.columns = newcol

df.columns
Index(['activity_day', 'workout_type', 'distance', 'time', 'calories',
       'total_steps', 'avg_speed', 'avg_cadence', 'max_cadence', 'avg_pace',
       'max_pace', 'min_pace', 'avg_heart_rate', 'max_heart_rate',
       'min_heart_rate', 'vo2_max', 'aerobic', 'anaerobic', 'intensive',
       'light'],
      dtype='object')
df.activity_day = pa.to_datetime(df.activity_day)
df.total_steps = df.total_steps.fillna(0)
df.avg_pace = df.avg_pace.fillna('00:00').apply(lambda s: int(s[0:2])+int(s[3:6])/60 )
df.groupby('workout_type').calories.agg(['mean', 'median','std','count'])
mean median std count
workout_type
Cricket 307.548387 330.0 149.950609 93
Freestyle 278.552083 294.0 163.703768 96
Indoor Cycling 280.450000 306.0 156.633322 80
Open Water 296.747253 328.0 160.068499 91
Outdoor Cycling 299.129412 301.0 158.731263 85
Outdoor Running 301.469136 349.0 165.725080 81
Pool Swimming 283.414894 300.0 157.576703 94
Trail Run 267.966667 264.0 155.748533 90
Treadmill 278.142857 269.5 146.963352 98
Trekking 283.127660 280.5 151.034870 94
Walking 276.040816 270.5 157.287570 98
df['high_aerobic'] = df.aerobic >0.3
where = df[df.max_cadence == max(df.max_cadence)].workout_type.item()
when = df[df.max_cadence == max(df.max_cadence)].activity_day.dt.day_name().item()
print('The max cadence happened on {} in the {}'.format(when,where))
The max cadence happened on Wednesday in the Pool Swimming