Exam 1 Solutions

Exam 1 Solutions#

import requests
import pandas as pa
from bs4 import BeautifulSoup


r = requests.get('https://en.wikipedia.org/wiki/List of mountains by elevation')
html_contents = r.text
html_soup = BeautifulSoup(html_contents,"lxml")
tables = html_soup.find_all('table',class_="wikitable")

df = pa.read_html(str(tables))[0]

df.head()

	Mountain	Metres	Feet	Range	Location and Notes
0	Mount Everest	8848	29029	Himalayas	Nepal/China
1	K2	8611	28251	Karakoram	Pakistan/China
2	Kangchenjunga	8586	28169	Himalayas	Nepal/India
3	Lhotse	8516	27940	Himalayas	Nepal – Climbers ascend Lhotse Face in climbin...
4	Makalu	8485	27838	Himalayas	Nepal

df.query('Range == "Himalayas"')

	Mountain	Metres	Feet	Range	Location and Notes
0	Mount Everest	8848	29029	Himalayas	Nepal/China
2	Kangchenjunga	8586	28169	Himalayas	Nepal/India
3	Lhotse	8516	27940	Himalayas	Nepal – Climbers ascend Lhotse Face in climbin...
4	Makalu	8485	27838	Himalayas	Nepal
5	Cho Oyu	8188	26864	Himalayas	Nepal – Considered "easiest" eight-thousander
6	Dhaulagiri	8167	26795	Himalayas	Nepal – Presumed world's highest from 1808-1838
7	Manaslu	8163	26781	Himalayas	Nepal
8	Nanga Parbat	8126	26660	Himalayas	Pakistan
9	Annapurna	8091	26545	Himalayas	Nepal – First eight-thousander to be climbed (...
13	Shishapangma	8027	26335	Himalayas	China

df.groupby('Range')['Range'].count().plot(kind= 'bar')

<matplotlib.axes._subplots.AxesSubplot at 0x7f651ae03850>

../../_images/b61af8b28a0f629d89bd58e75a81401adcbf34345674e8817b7196de3b537dba.png

df.Feet.plot(kind='box')

<matplotlib.axes._subplots.AxesSubplot at 0x7f651c3f0ad0>

../../_images/367410b305dbdfb13f33b4bfb06e193d70857d5db10a3361ae1685dc13f1268c.png

df.Feet.describe()

count       14.000000
mean     27178.000000
std        879.489537
min      26335.000000
25%      26518.000000
50%      26788.000000
75%      27914.500000
max      29029.000000
Name: Feet, dtype: float64

df.groupby('Range').agg(['mean','max','std'])

	Metres			Feet
	mean	max	std	mean	max	std
Range
Himalayas	8319.70	8848	270.100413	27295.6	29029	886.239521
Karakoram	8194.25	8611	278.456909	26884.0	28251	913.363382

from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated

%%bigquery --project pic-math
SELECT 
  COUNT(*) as total_rows
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`

	total_rows
0	198792903

%%bigquery --project pic-math
SELECT payment_type, AVG(trip_total) as avg_trip_total, COUNT(*) as total_num_of_trips
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
GROUP BY payment_type
ORDER BY avg_trip_total DESC

	payment_type	avg_trip_total	total_num_of_trips
0	Prepaid	22.377662	1805
1	Credit Card	20.524892	80421932
2	Mobile	20.339910	698557
3	Prcard	20.320682	955795
4	Split	18.788039	3442
5	Unknown	17.649893	932066
6	Way2ride	16.639366	142
7	No Charge	15.836177	817699
8	Dispute	15.496588	83309
9	Cash	12.265548	114841282
10	Pcard	10.116931	36874

%%bigquery --project pic-math

SELECT COUNT(*) as num_trips_over_average_time
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` as s
WHERE s.trip_seconds > (SELECT AVG(trip_seconds) as avg_trip_seconds
                        FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
                        WHERE trip_seconds >0)

	num_trips_over_average_time
0	60750508