{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Untitled89.ipynb", "provenance": [], "authorship_tag": "ABX9TyOwfsfXLVpmt1w0/dNTUe89", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# Exam 2 Solutions" ], "metadata": { "id": "a0Bq5t6e04aD" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "29FMyIlV03xS", "outputId": "8b1d5261-787c-453e-8a00-328fa2dd37ea" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Unnamed: 0 int64\n", "activity_day object\n", "workout_type object\n", "distance float64\n", "time int64\n", "calories int64\n", "total_steps float64\n", "avg_speed float64\n", "avg_cadence float64\n", "max_cadence float64\n", "avg_pace object\n", "max_pace object\n", "min_pace object\n", "avg_heart_rate float64\n", "max_heart_rate float64\n", "min_heart_rate int64\n", "vo2_max(%) int64\n", "aerobic(%) int64\n", "anaerobic(%) int64\n", "intensive(%) int64\n", "light(%) int64\n", "dtype: object" ] }, "metadata": {}, "execution_count": 2 } ], "source": [ "import pandas as pa\n", "\n", "df = pa.read_csv('https://raw.githubusercontent.com/nurfnick/Data_Viz/main/Activity_Dataset_V1.csv')\n", "\n", "df.dtypes" ] }, { "cell_type": "code", "source": [ "df = df.drop('Unnamed: 0', axis = 1)\n", "\n", "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 270 }, "id": "YklX-L8H1CzF", "outputId": "7ae83f22-2e3a-4db9-9ec0-73759d7cdc4a" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " activity_day workout_type distance time calories total_steps \\\n", "0 2022-01-01 Freestyle 9.30 77 123 NaN \n", "1 2022-01-01 Freestyle 3.44 96 55 NaN \n", "2 2022-01-01 Indoor Cycling 6.34 85 33 NaN \n", "3 2022-01-01 Walking 7.91 42 82 1571.0 \n", "4 2022-01-01 Open Water 8.99 36 131 NaN \n", "\n", " avg_speed avg_cadence max_cadence avg_pace max_pace min_pace \\\n", "0 18.88 168.54 138.30 NaN NaN NaN \n", "1 29.65 125.92 292.81 NaN NaN NaN \n", "2 17.85 81.93 323.69 NaN NaN NaN \n", "3 22.10 29.63 180.16 07:58 28:58 07:58 \n", "4 25.83 64.55 342.89 NaN NaN NaN \n", "\n", " avg_heart_rate max_heart_rate min_heart_rate vo2_max(%) aerobic(%) \\\n", "0 112.5 122.0 103 19 28 \n", "1 111.0 122.0 100 42 28 \n", "2 95.0 90.0 100 1 32 \n", "3 83.0 85.0 81 3 22 \n", "4 138.0 166.0 110 7 0 \n", "\n", " anaerobic(%) intensive(%) light(%) \n", "0 2 7 50 \n", "1 2 29 88 \n", "2 0 22 43 \n", "3 0 24 65 \n", "4 5 21 88 " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
activity_dayworkout_typedistancetimecaloriestotal_stepsavg_speedavg_cadencemax_cadenceavg_pacemax_pacemin_paceavg_heart_ratemax_heart_ratemin_heart_ratevo2_max(%)aerobic(%)anaerobic(%)intensive(%)light(%)
02022-01-01Freestyle9.3077123NaN18.88168.54138.30NaNNaNNaN112.5122.010319282750
12022-01-01Freestyle3.449655NaN29.65125.92292.81NaNNaNNaN111.0122.0100422822988
22022-01-01Indoor Cycling6.348533NaN17.8581.93323.69NaNNaNNaN95.090.010013202243
32022-01-01Walking7.9142821571.022.1029.63180.1607:5828:5807:5883.085.08132202465
42022-01-01Open Water8.9936131NaN25.8364.55342.89NaNNaNNaN138.0166.01107052188
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "import re\n", "newcol = []\n", "for col in df.columns:\n", " newcol.append(re.sub(r'\\(%\\)',\"\",col))\n", "\n", "df.columns = newcol\n", "\n", "df.columns" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9dpBrU0sfkYi", "outputId": "eda4139b-d741-483b-aa8a-9c1f183a6c85" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['activity_day', 'workout_type', 'distance', 'time', 'calories',\n", " 'total_steps', 'avg_speed', 'avg_cadence', 'max_cadence', 'avg_pace',\n", " 'max_pace', 'min_pace', 'avg_heart_rate', 'max_heart_rate',\n", " 'min_heart_rate', 'vo2_max', 'aerobic', 'anaerobic', 'intensive',\n", " 'light'],\n", " dtype='object')" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "code", "source": [ "df.activity_day = pa.to_datetime(df.activity_day)" ], "metadata": { "id": "UzlCJimSflsK" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df.total_steps = df.total_steps.fillna(0)" ], "metadata": { "id": "w0hedZ5YhRjx" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df.avg_pace = df.avg_pace.fillna('00:00').apply(lambda s: int(s[0:2])+int(s[3:6])/60 )" ], "metadata": { "id": "R-kBJzPRhqBf" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df.groupby('workout_type').calories.agg(['mean', 'median','std','count'])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 426 }, "id": "Up4OaSUYh_Id", "outputId": "4b7bf082-fff3-4f0d-8425-0176da3f8dc3" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " mean median std count\n", "workout_type \n", "Cricket 307.548387 330.0 149.950609 93\n", "Freestyle 278.552083 294.0 163.703768 96\n", "Indoor Cycling 280.450000 306.0 156.633322 80\n", "Open Water 296.747253 328.0 160.068499 91\n", "Outdoor Cycling 299.129412 301.0 158.731263 85\n", "Outdoor Running 301.469136 349.0 165.725080 81\n", "Pool Swimming 283.414894 300.0 157.576703 94\n", "Trail Run 267.966667 264.0 155.748533 90\n", "Treadmill 278.142857 269.5 146.963352 98\n", "Trekking 283.127660 280.5 151.034870 94\n", "Walking 276.040816 270.5 157.287570 98" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanmedianstdcount
workout_type
Cricket307.548387330.0149.95060993
Freestyle278.552083294.0163.70376896
Indoor Cycling280.450000306.0156.63332280
Open Water296.747253328.0160.06849991
Outdoor Cycling299.129412301.0158.73126385
Outdoor Running301.469136349.0165.72508081
Pool Swimming283.414894300.0157.57670394
Trail Run267.966667264.0155.74853390
Treadmill278.142857269.5146.96335298
Trekking283.127660280.5151.03487094
Walking276.040816270.5157.28757098
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 35 } ] }, { "cell_type": "code", "source": [ "df['high_aerobic'] = df.aerobic >0.3" ], "metadata": { "id": "i-orOKdBjMgm" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "where = df[df.max_cadence == max(df.max_cadence)].workout_type.item()" ], "metadata": { "id": "FSpM-qWNjb92" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "when = df[df.max_cadence == max(df.max_cadence)].activity_day.dt.day_name().item()" ], "metadata": { "id": "qBssfOoAjpyD" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "print('The max cadence happened on {} in the {}'.format(when,where))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hmmcD_dUkXO8", "outputId": "263c6569-0fe4-46ed-ef49-72142a28ce81" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The max cadence happened on Wednesday in the Pool Swimming\n" ] } ] } ] }