Exam 3 Solutions Data Visualization and Preprocessing

Contents

Open In Colab

Exam 3 Solutions Data Visualization and Preprocessing#

import numpy as np
import pandas as pa
import seaborn as sns
import matplotlib.pyplot as plt

df = pa.read_csv('https://raw.githubusercontent.com/nurfnick/Data_Viz/main/worldCupPlayer.csv')
df.head()
Unnamed: 0 Rank player club age Apps Mins Goals Assists Yel Red SpG PS AerialsWon MotM Rating nationality
0 0 1 Hakim Ziyech Ajax 25 34 3043 9 15 4 0 4.9 75.4 0.2 9 8.21 Morocco
1 2 2 Alireza Jahanbakhsh AZ Alkmaar 24 33 2840 21 12 3 0 4.3 73.4 0.7 14 8.20 Iran
2 4 3 Hirving Lozano PSV Eindhoven 22 29 2350 17 8 4 2 3.4 75.3 0.6 8 7.90 Mexico
3 6 11 Lasse Schone Ajax 32 27(3) 2350 10 3 6 0 1.9 87.0 1.9 2 7.45 Denmark
4 10 25 Santiago Arias PSV Eindhoven 26 30 2664 3 6 5 0 1.5 82.3 1 2 7.25 Colombia

COYS!#

Rooster

df.query('club == "Tottenham"')
Unnamed: 0 Rank player club age Apps Mins Goals Assists Yel Red SpG PS AerialsWon MotM Rating nationality
414 795 5 Harry Kane Tottenham 24 35(2) 3083 30 2 5 0 5 71.2 1.6 9 7.60 England
415 796 2 Harry Kane Tottenham 24 7 597 7 2 0 0 4.6 78.6 1.6 2 8.34 England
422 806 11 Christian Eriksen Tottenham 26 37 3226 10 10 0 0 2.6 82.4 0.5 4 7.40 Denmark
423 808 95 Christian Eriksen Tottenham 26 6 535 2 1 0 0 3.3 83.1 0.5 0 7.08 Denmark
430 820 22 Jan Vertonghen Tottenham 31 36 3205 0 0 4 0 0.7 86.3 3.3 3 7.26 Belgium
431 822 231 Jan Vertonghen Tottenham 31 6 540 0 1 1 1 0.3 86.5 2 0 6.75 Belgium
446 852 44 Dele Alli Tottenham 22 34(2) 2972 9 10 7 0 1.9 77.2 0.8 1 7.09 England
447 854 13 Dele Alli Tottenham 22 5 427 2 4 1 0 1.6 80.7 0.6 1 7.73 England
455 870 57 Kieran Trippier Tottenham 27 21(3) 1914 0 5 1 0 0.3 81.4 1.5 0 7.04 England
464 888 75 Eric Dier Tottenham 24 32(2) 2827 0 2 4 0 0.9 86.3 2.4 0 6.96 England
465 890 179 Eric Dier Tottenham 24 7 614 0 1 1 0 0.4 80.9 1.6 0 6.86 England
468 896 78 Davinson Sanchez Tottenham 21 29(2) 2535 0 0 1 1 0.3 89.4 2.4 0 6.94 Colombia
469 898 236 Davinson Sanchez Tottenham 21 8 720 0 0 0 0 0.4 89.7 1.3 0 6.74 Colombia
475 910 94 Mousa Dembele Tottenham 30 21(7) 1886 0 0 6 0 0.4 92.2 0.3 0 6.90 Belgium
476 912 268 Mousa Dembele Tottenham 30 3(3) 328 0 0 2 0 - 92.7 0.2 0 6.66 Belgium
495 950 171 Hugo Lloris Tottenham 31 36 3240 0 0 1 0 - 75.2 0.3 1 6.74 France
496 952 100 Hugo Lloris Tottenham 31 7 630 0 0 0 0 - 60.2 0.4 0 7.07 France
558 1072 134 Toby Alderweireld Tottenham 29 4 294 0 1 0 0 0.5 81.3 1.3 0 7.00 Belgium
ax = df.Goals.plot.hist(title = 'Histogram of Goals', bins = 25)
ax.set(xlabel = 'Goals')
plt.show()
../../_images/e58744708bb6e05ac83ba28ab5a2127629bcd4be6312def65d409bc1ca44339a.png
ax = sns.histplot(data = df, x = 'Goals')
ax.set(title = 'Histogram of Goals')
[Text(0.5, 1.0, 'Histogram of Goals')]
../../_images/9d78868aeb7bb6db6bcea998e34b0e6e66cbf2d9f7fd9cb4612253ecb8a5d7cf.png
ax = df[['Goals','Assists']].plot.hist(alpha = .7, bins = 25)
ax.set(title = 'Goals and Assists Histogram')
plt.show()
../../_images/1c2899aca2febcf14499d03ad11d3e30be655a1400931e242fb7dec1bb4dd87c.png
ax = df.groupby('nationality').Goals.agg('max').reset_index().plot(kind = 'bar', x = 'nationality', legend = False)
ax.set(title = 'Most Goals Scored by Nationality')
ax.set(ylabel = 'Max Goals')
plt.show()
../../_images/4c98d28ceffac452c6acb3016d9ef475eae1753f3bc3a3065f4200ecf5441167.png
df.boxplot(by = 'nationality', column = 'Yel', rot = 45)
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py:1376: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
  X = np.atleast_1d(X.T if isinstance(X, np.ndarray) else np.asarray(X))
<matplotlib.axes._subplots.AxesSubplot at 0x7ff1c9c30090>
../../_images/30469735fe35085b2d4222c89df7a609b53b786fcc734f921787396bdabfc573.png
df.club == 'Tottenham'

df['Tottenham'] = df.club == 'Tottenham'
sns.scatterplot(data = df, x = 'Goals', y = 'Assists', hue = 'Tottenham', alpha = .7)
<matplotlib.axes._subplots.AxesSubplot at 0x7ff1c9584610>
../../_images/fa00e9002fa4813d405dc9516a69ab66ac6e0bc3bef3f3845135eb715625da53.png
plt.rcParams["figure.figsize"] = (5,15)
ax = df[df.nationality == 'France'].plot.barh(x = 'player', y = ['Goals','Assists'], stacked = True, color = ['blue','red'])
ax.set(title = 'Goals and Assists of the French National Team')
plt.show()
../../_images/02091e000a078668da03cb380b5b03fb6090c4fc011e415539ff484e633a4a2c.png
plt.rcParams["figure.figsize"] = (15,15)
ax = sns.heatmap(df.corr(),vmin = -1)
ax.set(title = 'Heatmap of the World Cup Players')
plt.show()
../../_images/b73e11cfcdb048057b312bd4c06b433beddc0355e824691513cc09cef2553f63.png