Session Objectives¶

Introduction to Plotly
Scatter Plots
Line Charts
Bar Plot
Bubble Plot
Box Plot
Histograms
Distplots
Heatmaps

1. Introduction to Plotly¶

1. Until now we did visualisations using Matplotlib, Seaborn and Pandas. All of them produce static image files.

2. Plotly is company based out in Canada famous for it's products like Plotly and Dash

3. Plotly creates interactive visualisations in the form of HTML files

4. Drawback- can't work with a live data source

5. Dash is used to create live data based dashboards.

import numpy as np
import pandas as pd
#import plotly.offline as pyo   used for making a new html file
import plotly.express as px
import plotly.graph_objs as go
#pyo.init_notebook_mode(connected=True)

match=pd.read_csv('plotly_data/matches.csv')
delivery=pd.read_csv('plotly_data/deliveries.csv')

ipl=delivery.merge(match,left_on='match_id',right_on='id')
ipl.head()

1. Scatter Plots¶

# Scatter plots are drawn between to continous variables
# Problem :- We are going to draw a scatter plot between Batsman Avg(X axis) and
# Batsman Strike Rate(Y axis) of the top 50 batsman in IPL(All time)

# Avg vs SR graph of Top 50 batsman(in terms of total runs)

# Fetching a new dataframe with Top 50 batsman
top50=ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.tolist()
new_ipl=ipl[ipl['batsman'].isin(top50)]

# Calculating SR
# SR=[(number of runs scored)/(number of balls played)]*100
runs=new_ipl.groupby('batsman')['batsman_runs'].sum()
balls=new_ipl.groupby('batsman')['batsman_runs'].count()

sr=(runs/balls)*100

sr=sr.reset_index()
sr.head()

# Calculating Avg
# Avg=(Total number of Runs)/(Number of outs)

# Calculating number of outs for top 50 batsman
out=ipl[ipl['player_dismissed'].isin(top50)]

nouts=out['player_dismissed'].value_counts()

avg=runs/nouts

avg=avg.reset_index()
avg.rename(columns={'index':'batsman',0:'avg'},inplace=True)

avg=avg.merge(sr,on='batsman')
avg.head()

# Plot Scatter Plot here

trace = go.Scatter(x=avg['avg'], y=avg['batsman_runs'], mode='markers')

data = [trace]

layout = go.Layout(title='Avg vs SR of Top 50 Batsman', xaxis={'title':'Avg'}, yaxis={'title':'SR'})

fig= go.Figure(data=data, layout=layout)

#pyo.plot(fig)
fig.show()

2. Line Chart¶

It's an extension of Scatter plot. Usually used to show a time series data

# Year by Year batsman performance

single=ipl[ipl['batsman']=='V Kohli']
performance=single.groupby('season')['batsman_runs'].sum().reset_index()
performance

# Plot Line Chart here

trace = go.Scatter(x=performance['season'], y=performance['batsman_runs'], mode='lines+markers')

data = [trace]

layout = go.Layout(title='Year by Year Performance of Virat Kohli', xaxis={'title':'Season'}, yaxis={'title':'Runs'})
fig = go.Figure(data=data, layout=layout)
fig.show()

# Multiple Line Charts

def batsman_comp(*name):
    data=[]
    for i in name:
        single=ipl[ipl['batsman']==i]
        performance=single.groupby('season')['batsman_runs'].sum().reset_index()

        trace=go.Scatter(x=performance['season'],y=performance['batsman_runs']
                         ,mode='lines + markers',name=i)
        
        data.append(trace)
    
    layout=go.Layout(title='Batsman Record Comparator',
                xaxis={'title':'Season'},
                yaxis={'title':'Runs'})

    fig=go.Figure(data=data,layout=layout)
    fig.show()

batsman_comp('V Kohli', 'RG Sharma','DA Warner','MS Dhoni')

3. Bar Plot¶

Used to show relation between one categorical and 1 numerical data

top10=ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.tolist()
top10_df=ipl[ipl['batsman'].isin(top10)]

top10_score=top10_df.groupby('batsman')['batsman_runs'].sum().reset_index()
top10_score

# Plot Bar Graph

trace = go.Bar(x=top10_score['batsman'], y=top10_score['batsman_runs'])

data = [trace]
layout = go.Layout(title='Top 10 Batsman in IPL', xaxis={'title':'Batsman'}, yaxis={'title':'Runs'})
fig = go.Figure(data=data, layout=layout)
fig.show()

There are 2 types of Bar Graphs¶

1. Nested Bar Graph
2. Stacked Bar Graph
3. Overlayed Bar Graph

iw=top10_df.groupby(['batsman','inning'])['batsman_runs'].sum().reset_index();
mask=iw['inning']==1;
mask2=iw['inning']==2;
one=iw[mask];
two=iw[mask2];


one.rename(columns={'batsman_runs':'1st Innings'},inplace=True);
two.rename(columns={'batsman_runs':'2nd Innings'},inplace=True);

final=one.merge(two,on='batsman')[['batsman','1st Innings','2nd Innings']];

final

/tmp/ipykernel_13521/3723927907.py:8: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_13521/3723927907.py:9: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

# Plot Bar Chart here
trace1 = go.Bar(x=final['batsman'], y=final['1st Innings'], name='1st Innings')
tarce2 = go.Bar(x=final['batsman'], y=final['2nd Innings'], name='2nd Innings')

data = [trace1, tarce2]
layout = go.Layout(title='1st Innings vs 2nd Innings Runs of Top 10 Batsman', xaxis={'title':'Batsman'}, yaxis={'title':'Runs'})
fig = go.Figure(data=data, layout=layout)
fig.show()

# Plot Stacked Bar Chart here
trace1 = go.Bar(x=final['batsman'], y=final['1st Innings'], name='1st Innings')
tarce2 = go.Bar(x=final['batsman'], y=final['2nd Innings'], name='2nd Innings')

data = [trace1, tarce2]
layout = go.Layout(title='1st Innings vs 2nd Innings Runs of Top 10 Batsman', xaxis={'title':'Batsman'}, yaxis={'title':'Runs'})
fig = go.Figure(data=data, layout=layout)

fig.update_layout(barmode='stack')   # Change the bar mode

fig.show()

4. Bubble Plot¶

Again an extension of Scatter plot. with some additional informations

new_ipl=new_ipl[new_ipl['batsman_runs']==6]

six=new_ipl.groupby('batsman')['batsman_runs'].count().reset_index()

x=avg.merge(six,on='batsman')

x

# Plot Bubble chart here

trace = go.Scatter(x=x['batsman_runs_x'], y=x['batsman_runs_y'], mode='markers', marker=dict(size=x['avg']))

data = [trace]

layout = go.Layout(title='Avg vs SR of Top 50 Batsman', xaxis={'title':'Avg'}, yaxis={'title':'SR'})

fig= go.Figure(data=data, layout=layout)

fig.show()

5. Box Plot¶

A box and whisker plot—also called a box plot—displays the five-number summary of a set of data.

match_agg=delivery.groupby(['match_id'])['total_runs'].sum().reset_index()
season_wise=match_agg.merge(match,left_on='match_id',right_on='id')[['match_id','total_runs','season']]
season_wise

# Plot Box Plot here

trace = go.Box(x=season_wise['season'], y=season_wise['total_runs'])

data = [trace]

layout = go.Layout(title='Season Wise Runs Distribution', xaxis={'title':'Season'}, yaxis={'title':'Runs'})

fig = go.Figure(data=data, layout=layout)
fig.show()

6. Histograms¶

A histogram is a plot that lets you discover, and show, the underlying frequency distribution (shape) of a set of continuous data.

x=delivery.groupby('batsman')['batsman_runs'].count()>150
x=x[x].index.tolist()

new=delivery[delivery['batsman'].isin(x)]


runs=new.groupby('batsman')['batsman_runs'].sum()
balls=new.groupby('batsman')['batsman_runs'].count()

sr=(runs/balls)*100

sr=sr.reset_index()
sr

# Plot Histogram

tarce = go.Histogram(x= sr['batsman_runs'], nbinsx=25)

data = [tarce]

layout = go.Layout(title='Distribution of Strike Rate', xaxis={'title':'SR'}, yaxis={'title':'Count'})

fig = go.Figure(data=data, layout=layout)
fig.show()

7. Distplots¶

# Plot Distplot

import plotly.figure_factory as ff

hist_data=[avg['avg'], avg['batsman_runs']]

group_labels=['Average', 'Strike Rate']

fig=ff.create_distplot(hist_data,group_labels,bin_size=[6,15])
fig.show()
#pyo.plot(fig)

8. Heatmaps¶

A heat map is a graphical representation of data where the individual values contained in a matrix are represented as colors.

six=delivery[delivery['batsman_runs']==6]
six=six.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()

six

# Plot Heatmap

trace=go.Heatmap(x=six['batting_team'],y=six['over'],z=six['batsman_runs'])

data=[trace]

layout=go.Layout(title='Six Heatmap')

fig=go.Figure(data=data,layout=layout)
fig.show()
#pyo.plot(fig)

# Side by Side Heatmap

dots=delivery[delivery['batsman_runs']==0]
dots=dots.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()

from plotly import subplots

trace1=go.Heatmap(x=six['batting_team'],y=six['over'],
                 z=six['batsman_runs'].values.tolist())

trace2=go.Heatmap(x=dots['batting_team'],y=dots['over'],
                 z=dots['batsman_runs'].values.tolist())


fig=subplots.make_subplots(rows=1,cols=2,subplot_titles=["6's","0's"], shared_yaxes=True)

fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)

fig.show()
#pyo.plot(fig)

dots

9. Mesh Plots¶

np.random.seed(1)

N = 70

trace =  go.Mesh3d(x = (70*np.random.randn(N))
                   ,y = (55*np.random.randn(N)),
                     z = (40*np.random.randn(N)),
                     color='rgba(144,220,100,0.4)'
                     )

data = [trace]

layout = go.Layout(title='3D Scatter Plot',
                   scene = dict(xaxis = dict(nticks=4, range=[-100,100],),
                                yaxis = dict(nticks=4, range=[-100,100],),
                                zaxis = dict(nticks=4, range=[-100,100],)),
                                width=1000,
                                height=700,
                                margin=dict(r=20, l=10, b=10, t=10)
                                )

fig = go.Figure(data=data, layout=layout)
fig.show()

	match_id	inning	batting_team	bowling_team	over	ball	batsman	non_striker	bowler	...	result	winner	win_by_runs	player_of_match	venue	umpire1	umpire2	umpire3
0	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	1	DA Warner	S Dhawan	TS Mills	...	normal	Sunrisers Hyderabad	35	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
1	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	2	DA Warner	S Dhawan	TS Mills	...	normal	Sunrisers Hyderabad	35	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
2	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	3	DA Warner	S Dhawan	TS Mills	...	normal	Sunrisers Hyderabad	35	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
3	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	4	DA Warner	S Dhawan	TS Mills	...	normal	Sunrisers Hyderabad	35	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN
4	1	1	Sunrisers Hyderabad	Royal Challengers Bangalore	1	5	DA Warner	S Dhawan	TS Mills	...	normal	Sunrisers Hyderabad	35	Yuvraj Singh	Rajiv Gandhi International Stadium, Uppal	AY Dandekar	NJ Llong	NaN

	batsman	batsman_runs
0	AB de Villiers	145.129059
1	AC Gilchrist	133.054662
2	AJ Finch	126.299213
3	AM Rahane	117.486549
4	AT Rayudu	123.014257

	batsman	avg	batsman_runs
0	AB de Villiers	38.307692	145.129059
1	AC Gilchrist	27.223684	133.054662
2	AJ Finch	27.186441	126.299213
3	AM Rahane	33.593407	117.486549
4	AT Rayudu	27.146067	123.014257

	season	batsman_runs
0	2008	165
1	2009	246
2	2010	307
3	2011	557
4	2012	364
5	2013	639
6	2014	359
7	2015	505
8	2016	973
9	2017	308

	batsman	batsman_runs
0	AB de Villiers	3486
1	CH Gayle	3651
2	DA Warner	4014
3	G Gambhir	4132
4	MS Dhoni	3560
5	RG Sharma	4207
6	RV Uthappa	3778
7	S Dhawan	3561
8	SK Raina	4548
9	V Kohli	4423

	batsman	1st Innings	2nd Innings
0	AB de Villiers	2128	1345
1	CH Gayle	2003	1623
2	DA Warner	2118	1896
3	G Gambhir	1699	2433
4	MS Dhoni	2232	1328
5	RG Sharma	2344	1863
6	RV Uthappa	1516	2262
7	S Dhawan	2262	1299
8	SK Raina	2647	1893
9	V Kohli	2391	2027

	batsman	batsman_runs
0	A Ashish Reddy	142.857143
1	A Mishra	89.005236
2	A Symonds	124.711908
3	AA Jhunjhunwala	99.541284
4	AB Agarkar	111.875000
...	...	...
157	Y Nagar	105.166052
158	Y Venugopal Rao	113.872832
159	YK Pathan	140.751445
160	YV Takawale	104.918033
161	Yuvraj Singh	126.390244

	batting_team	over	batsman_runs
0	Chennai Super Kings	1	5
1	Chennai Super Kings	2	17
2	Chennai Super Kings	3	37
3	Chennai Super Kings	4	34
4	Chennai Super Kings	5	41
...	...	...	...
271	Sunrisers Hyderabad	16	22
272	Sunrisers Hyderabad	17	18
273	Sunrisers Hyderabad	18	37
274	Sunrisers Hyderabad	19	42
275	Sunrisers Hyderabad	20	28

	batsman	avg	batsman_runs_x	batsman_runs_y
0	AB de Villiers	38.307692	145.129059	158
1	AC Gilchrist	27.223684	133.054662	92
2	AJ Finch	27.186441	126.299213	59
3	AM Rahane	33.593407	117.486549	60
4	AT Rayudu	27.146067	123.014257	79
5	BB McCullum	28.112245	126.318203	124
6	BJ Hodge	33.333333	121.422376	43
7	CH Gayle	41.022472	144.194313	266
8	DA Miller	34.733333	137.709251	78
9	DA Warner	40.140000	138.318401	160
10	DJ Bravo	22.945455	122.286822	48
11	DJ Hussey	26.979592	120.072661	60
12	DPMD Jayawardene	28.250000	118.791064	40
13	DR Smith	28.392857	132.279534	117
14	F du Plessis	30.116279	124.161074	38
15	G Gambhir	31.541985	120.361200	58
16	GJ Maxwell	25.081633	157.564103	82
17	JH Kallis	28.552941	105.936272	44
18	JP Duminy	39.860000	121.970624	78
19	KA Pollard	28.707317	140.621266	148
20	KC Sangakkara	25.953846	118.469101	27
21	KD Karthik	24.811966	123.008475	71
22	KK Nair	25.173913	124.650161	26
23	M Vijay	26.431579	120.028681	89
24	MEK Hussey	38.764706	119.963592	52
25	MK Pandey	28.500000	116.938453	65
26	MK Tiwary	29.428571	114.127424	39
27	MS Dhoni	37.872340	132.835821	156
28	NV Ojha	20.986486	114.528024	79
29	PA Patel	21.700935	112.718447	35
30	R Dravid	28.233766	113.347237	28
31	RA Jadeja	24.055556	118.792867	57
32	RG Sharma	32.612403	128.497251	173
33	RV Uthappa	29.515625	127.635135	125
34	S Badrinath	30.659574	115.742972	28
35	S Dhawan	32.669725	118.502496	71
36	SC Ganguly	25.452830	101.734540	42
37	SE Marsh	39.507937	130.109775	78
38	SK Raina	33.940299	134.995548	174
39	SPD Smith	37.239130	128.507127	45

	match_id	total_runs	season
0	1	379	2017
1	2	371	2017
2	3	367	2017
3	4	327	2017
4	5	299	2017
...	...	...	...
631	632	277	2016
632	633	317	2016
633	634	302	2016
634	635	325	2016
635	636	408	2016

Contents