Exam 1 Solutions#
import requests
import pandas as pa
from bs4 import BeautifulSoup
r = requests.get('https://en.wikipedia.org/wiki/List of mountains by elevation')
html_contents = r.text
html_soup = BeautifulSoup(html_contents,"lxml")
tables = html_soup.find_all('table',class_="wikitable")
df = pa.read_html(str(tables))[0]
df.head()
| Mountain | Metres | Feet | Range | Location and Notes | |
|---|---|---|---|---|---|
| 0 | Mount Everest | 8848 | 29029 | Himalayas | Nepal/China |
| 1 | K2 | 8611 | 28251 | Karakoram | Pakistan/China |
| 2 | Kangchenjunga | 8586 | 28169 | Himalayas | Nepal/India |
| 3 | Lhotse | 8516 | 27940 | Himalayas | Nepal – Climbers ascend Lhotse Face in climbin... |
| 4 | Makalu | 8485 | 27838 | Himalayas | Nepal |
df.query('Range == "Himalayas"')
| Mountain | Metres | Feet | Range | Location and Notes | |
|---|---|---|---|---|---|
| 0 | Mount Everest | 8848 | 29029 | Himalayas | Nepal/China |
| 2 | Kangchenjunga | 8586 | 28169 | Himalayas | Nepal/India |
| 3 | Lhotse | 8516 | 27940 | Himalayas | Nepal – Climbers ascend Lhotse Face in climbin... |
| 4 | Makalu | 8485 | 27838 | Himalayas | Nepal |
| 5 | Cho Oyu | 8188 | 26864 | Himalayas | Nepal – Considered "easiest" eight-thousander |
| 6 | Dhaulagiri | 8167 | 26795 | Himalayas | Nepal – Presumed world's highest from 1808-1838 |
| 7 | Manaslu | 8163 | 26781 | Himalayas | Nepal |
| 8 | Nanga Parbat | 8126 | 26660 | Himalayas | Pakistan |
| 9 | Annapurna | 8091 | 26545 | Himalayas | Nepal – First eight-thousander to be climbed (... |
| 13 | Shishapangma | 8027 | 26335 | Himalayas | China |
df.groupby('Range')['Range'].count().plot(kind= 'bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7f651ae03850>
df.Feet.plot(kind='box')
<matplotlib.axes._subplots.AxesSubplot at 0x7f651c3f0ad0>
df.Feet.describe()
count 14.000000
mean 27178.000000
std 879.489537
min 26335.000000
25% 26518.000000
50% 26788.000000
75% 27914.500000
max 29029.000000
Name: Feet, dtype: float64
df.groupby('Range').agg(['mean','max','std'])
| Metres | Feet | |||||
|---|---|---|---|---|---|---|
| mean | max | std | mean | max | std | |
| Range | ||||||
| Himalayas | 8319.70 | 8848 | 270.100413 | 27295.6 | 29029 | 886.239521 |
| Karakoram | 8194.25 | 8611 | 278.456909 | 26884.0 | 28251 | 913.363382 |
from google.colab import auth
auth.authenticate_user()
print('Authenticated')
Authenticated
%%bigquery --project pic-math
SELECT
COUNT(*) as total_rows
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
| total_rows | |
|---|---|
| 0 | 198792903 |
%%bigquery --project pic-math
SELECT payment_type, AVG(trip_total) as avg_trip_total, COUNT(*) as total_num_of_trips
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
GROUP BY payment_type
ORDER BY avg_trip_total DESC
| payment_type | avg_trip_total | total_num_of_trips | |
|---|---|---|---|
| 0 | Prepaid | 22.377662 | 1805 |
| 1 | Credit Card | 20.524892 | 80421932 |
| 2 | Mobile | 20.339910 | 698557 |
| 3 | Prcard | 20.320682 | 955795 |
| 4 | Split | 18.788039 | 3442 |
| 5 | Unknown | 17.649893 | 932066 |
| 6 | Way2ride | 16.639366 | 142 |
| 7 | No Charge | 15.836177 | 817699 |
| 8 | Dispute | 15.496588 | 83309 |
| 9 | Cash | 12.265548 | 114841282 |
| 10 | Pcard | 10.116931 | 36874 |
%%bigquery --project pic-math
SELECT COUNT(*) as num_trips_over_average_time
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` as s
WHERE s.trip_seconds > (SELECT AVG(trip_seconds) as avg_trip_seconds
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
WHERE trip_seconds >0)
| num_trips_over_average_time | |
|---|---|
| 0 | 60750508 |