# Seaborn: A Statistical Data Visualization Package
## Styles

In [None]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

def sinplot(flip=1):
 x = np.linspace(0, 14, 100)
 for i in range(1, 7):
 plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)
 
sinplot()

In [None]:
import seaborn as sb
sb.set()

In [None]:
sinplot()

In [None]:
sb.set_style("whitegrid")
sinplot()

In [None]:
sb.set_style("dark")
sinplot()

In [None]:
sb.set_style("white")
sinplot()

In [None]:
sb.set_style("ticks")
sinplot()

In [None]:
sinplot()
sb.despine(offset=10, trim=True)

In [None]:
sb.set_style("whitegrid")
sinplot()
sb.despine(left=False)

In [None]:
with sb.axes_style("darkgrid"):
 plt.subplot(211)
 sinplot()
plt.subplot(212)
sinplot(-1)

In [None]:
sb.axes_style("dark")

In [None]:
sb.set_style("dark", {"axes.facecolor": ".1"})
sinplot()

In [None]:
sb.set()
sinplot()

In [None]:
sb.set_context("paper")
sinplot()

In [None]:
sb.set_context("notebook")
sinplot()

In [None]:
sb.set_context("talk")
sinplot()

In [None]:
sb.set_context("poster")
sinplot()

In [None]:
sb.set(context='paper', rc={"figure.figsize": (10, 6)})
sinplot()

# Color Palettes

In [None]:
current_palette = sb.color_palette("deep")
sb.palplot(current_palette)

In [None]:
sb.palplot(sb.color_palette("pastel"))

In [None]:
sb.palplot(sb.color_palette("bright"))

In [None]:
sb.palplot(sb.color_palette("dark"))

In [None]:
sb.palplot(sb.color_palette("colorblind"))

In [None]:
sb.palplot(sb.color_palette("Blues"))

In [None]:
sb.palplot(sb.color_palette("BuGn_r"))

In [None]:
sb.palplot(sb.color_palette("Paired"))

In [None]:
sb.palplot(sb.color_palette("Set2", n_colors=10, desat=.8))

In [None]:
sb.palplot(sb.color_palette("husl", n_colors=8))

In [None]:
sb.palplot(sb.color_palette("hls", 8))

In [None]:
sb.palplot(sb.hls_palette(8, l=.3, s=.8))

In [None]:
sb.palplot(sb.husl_palette(8, l=.3, s=.8))

In [None]:
sb.palplot(sb.light_palette("purple"))

In [None]:
sb.palplot(sb.dark_palette("purple"))

In [None]:
sb.palplot(sb.blend_palette(["firebrick", "palegreen"], 8)) 

In [None]:
sb.palplot(sb.diverging_palette(220, 20, n=7))

In [None]:
sb.diverging_palette?

In [None]:
sb.palplot(sb.diverging_palette(255, 133, l=60, n=7, center="dark"))

In [None]:
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sb.palplot(sb.color_palette(flatui))

In [None]:
colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple", "wisteria"]
sb.palplot(sb.xkcd_palette(colors))

In [None]:
sb.xkcd_rgb

In [None]:
sb.palplot(sb.cubehelix_palette(8, start=1.5, rot=1.75, dark=0.5, light=.95, reverse=True))

In [None]:
with sb.xkcd_palette(colors):
 sinplot()

In [None]:
sinplot()

In [None]:
sb.set_palette(sb.color_palette("Dark2", 8))
sinplot()

# Density Plots

In [None]:
x = np.random.normal(size=200)
sb.distplot(x)

In [None]:
sb.distplot(x, bins=20)

In [None]:
sb.distplot(x, bins=20, kde=False, rug=True)

In [None]:
f, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True)
sb.despine(left=True)

sb.distplot(x, kde=False, color="b", ax=axes[0, 0])

sb.distplot(x, hist=False, rug=True, color="r", ax=axes[0, 1])

sb.distplot(x, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0])

sb.distplot(x, color="y", ax=axes[1, 1])

#plt.setp(axes, yticks=[])
#plt.tight_layout()

In [None]:
x = np.random.normal(size=15)
support = np.linspace(-4, 4, 100)
bandwidth = 0.7
for x_i in x:
 plt.plot(support, sp.stats.norm(x_i, bandwidth).pdf(support))

In [None]:
for x_i in x:
 plt.plot(support, sp.stats.norm(x_i, bandwidth).pdf(support))
sb.rugplot(x, color=".2", linewidth=3)

In [None]:
kernels = []
for x_i in x:
 kernel = sp.stats.norm(x_i, bandwidth).pdf(support)
 kernels.append(kernel)

In [None]:
density = np.sum(kernels, axis=0)
plt.plot(support, density)

In [None]:
density /= sp.integrate.trapz(density, support)
plt.plot(support, density)

In [None]:
sb.kdeplot(x, shade=True)

In [None]:
sb.kdeplot(x, bw='scott', label="bw: scott")
sb.kdeplot(x, bw='silverman', label="bw: silverman")
sb.kdeplot(x, bw=.2, label="bw: 0.2")
sb.kdeplot(x, bw=2, label="bw: 2")
sb.rugplot(x)
plt.ylim(0, 0.8)
plt.legend()

In [None]:
x = np.random.normal(size=30)
sb.kdeplot(x, shade=True, cut=0)
sb.rugplot(x)

In [None]:
x = np.random.gamma(6, size=200)

plt.subplot(221)
sb.distplot(x, kde=False, hist=True, fit=sp.stats.norm)

plt.subplot(222)
sb.distplot(x, kde=False, hist=True, fit=sp.stats.laplace)

plt.subplot(223)
sb.distplot(x, kde=False, hist=True, fit=sp.stats.expon)

plt.subplot(224)
sb.distplot(x, kde=False, hist=True, fit=sp.stats.gamma)

## Bivariate Plots

In [None]:
import pandas as pd
sb.set()

In [None]:
mean, cov = [0, 1], [(1, .95), (.95, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
x, y = data.T[0], data.T[1]

In [None]:
sb.distplot(x)
sb.distplot(y)

In [None]:
sb.jointplot(x, y, size=7)

In [None]:
sb.jointplot(x, y, size=7, kind='hex')

In [None]:
sb.jointplot(x, y, size=7, kind='kde')
plt.savefig('kde2d.pdf')

In [None]:
f, ax = plt.subplots(figsize=(7, 7))
sb.kdeplot(x, y, ax=ax)
sb.rugplot(x, color="g", ax=ax)
sb.rugplot(y, vertical=True, ax=ax)

In [None]:
f, ax = plt.subplots(figsize=(7, 7))
cmap = sb.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse=True)
sb.kdeplot(x, y, ax=ax, cmap=cmap, n_levels=60, shade=True)

In [None]:
g = sb.jointplot(x, y, kind="kde")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("$X$", "$Y$")

In [None]:
rs = np.random.RandomState(50)

# Set up the matplotlib figure
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)

# Rotate the starting point around the cubehelix hue circle
for ax, s in zip(axes.flat, np.linspace(0, 3, 10)):
 # Create a cubehelix colormap to use with kdeplot
 cmap = sb.cubehelix_palette(start=s, light=1, as_cmap=True)
 # Generate and plot a random bivariate dataset
 x, y = rs.randn(2, 50)
 sb.kdeplot(x, y, cmap=cmap, shade=True, cut=5, ax=ax)
 ax.set(xlim=(-3, 3), ylim=(-3, 3))

f.tight_layout()

# Working with Dataframes

In [None]:
tips = sb.load_dataset("tips")
tips

In [None]:
sb.countplot(x=tips['day'], hue=tips['sex'])

In [None]:
sb.countplot(x="day", hue="sex", data=tips)

In [None]:
sb.countplot(y="day", hue="sex", data=tips, palette='hls')

In [None]:
sb.barplot(x="day", y="total_bill", hue="sex", data=tips)

In [None]:
sb.pointplot(x="day", y="total_bill", hue="sex", data=tips)

In [None]:
sb.pointplot(x="day", y="total_bill", hue="sex", data=tips, 
 palette={"Male": "#943684", "Female": "#528257"}, 
 markers=["^", "o"], linestyles=["-", "--"])

In [None]:
sb.stripplot(x="day", y="total_bill", data=tips)

In [None]:
sb.stripplot(x="day", y="total_bill", data=tips, jitter=True)

In [None]:
sb.swarmplot(x="time", y="tip", data=tips)

In [None]:
sb.swarmplot(x="size", y="total_bill", data=tips)

In [None]:
sb.swarmplot(x="day", y="total_bill", hue="sex", data=tips)

In [None]:
sb.swarmplot(x="total_bill", y="day", hue="time", data=tips)

In [None]:
sb.boxplot(x="day", y="total_bill", hue="time", data=tips)

In [None]:
tips

In [None]:
tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
tips

In [None]:
sb.boxplot(x="day", y="total_bill", hue="weekend", data=tips)

In [None]:
sb.boxplot(x="day", y="total_bill", data=tips)
sb.swarmplot(x="day", y="total_bill", data=tips, size=5, color=".3", linewidth=0)
sb.despine(trim=True, left=True)

In [None]:
sb.violinplot(x="total_bill", y="day", hue="sex", data=tips)

In [None]:
sb.violinplot(x="day", y="total_bill", hue="sex", data=tips, bw=.2, scale="count")

In [None]:
sb.violinplot(x="day", y="total_bill", hue="sex", data=tips, bw=.1, scale="count", 
 split=True)

In [None]:
sb.violinplot(x="day", y="total_bill", hue="sex", data=tips, split=True, inner="stick")

In [None]:
sb.violinplot(x="day", y="total_bill", hue="sex", data=tips, split=True, inner="quart")
sb.despine(left=True)

In [None]:
sb.violinplot(x="day", y="total_bill", data=tips, inner=None)
sb.swarmplot(x="day", y="total_bill", data=tips, color="w", alpha=.5)

In [None]:
sb.factorplot(x="day", y="total_bill", hue="sex", data=tips, size=6)

In [None]:
sb.factorplot(x="day", y="total_bill", hue="sex", data=tips, size=6, kind="bar")

In [None]:
tips

In [None]:
sb.factorplot(x="day", hue="size", data=tips, size=6, kind="count", palette="BuPu", aspect=1.5)

In [None]:
sb.factorplot(x="day", y="total_bill", hue="smoker", 
 col="time", data=tips, size=6, kind="swarm")

In [None]:
sb.factorplot(x="time", y="total_bill", hue="smoker", col="day", data=tips, 
 kind="box", size=6, aspect=.5)

In [None]:
tips

In [None]:
g = sb.PairGrid(tips, x_vars=["smoker", "time", "sex"], 
 y_vars=["total_bill", "tip", "size"], aspect=.75, size=6)
g.map(sb.swarmplot, palette="pastel")

## Bivariates Plots

In [None]:
iris = sb.load_dataset('iris')
iris

In [None]:
sb.jointplot(x="sepal_length", y="sepal_width", data=iris, size=7)

In [None]:
sb.pairplot(iris)

In [None]:
sb.pairplot(iris, hue="species")

In [None]:
g = sb.PairGrid(iris[iris.species == 'versicolor'])
g.map_diag(sb.distplot, norm_hist=True, bins='auto', kde_kws={'cumulative': True})
g.map_lower(sb.kdeplot, cmap="Blues_d", n_levels=6)
g.map_upper(plt.scatter)

In [None]:
# Subset the iris dataset by species
setosa = iris.query("species == 'setosa'")
virginica = iris.query("species == 'virginica'")

# Set up the figure
f, ax = plt.subplots(figsize=(8, 8))
ax.set_aspect("equal")

# Draw the two density plots
ax = sb.kdeplot(setosa.sepal_width, setosa.sepal_length,
 cmap="Reds", shade=True, shade_lowest=False)
ax = sb.kdeplot(virginica.sepal_width, virginica.sepal_length,
 cmap="Blues", shade=True, shade_lowest=False)

# Add labels to the plot
red = sb.color_palette("Reds")[-2]
blue = sb.color_palette("Blues")[-2]
ax.text(2.5, 8.2, "virginica", size=16, color=blue)
ax.text(3.8, 4.5, "setosa", size=16, color=red)

## Linear Regression

In [None]:
anscombe = sb.load_dataset("anscombe")
anscombe

In [None]:
sb.regplot(x="x", y="y", data=anscombe.query("dataset == 'I'"))

In [None]:
sb.lmplot(x="petal_length", y="sepal_length", data=iris, size=6)

In [None]:
sb.lmplot(x="petal_length", y="sepal_length", data=iris, size=6, x_estimator=np.mean)

In [None]:
sb.lmplot(x="x", y="y", data=anscombe.query("dataset == 'II'"), size=6)

In [None]:
sb.lmplot(x="x", y="y", data=anscombe.query("dataset == 'II'"), size=6, order=2)

In [None]:
sb.lmplot(x="petal_length", y="sepal_length", data=iris, size=6, order=2)

In [None]:
sb.lmplot(x="x", y="y", data=anscombe.query("dataset == 'III'"), size=6, 
 x_estimator=np.mean)

In [None]:
sb.lmplot(x="x", y="y", data=anscombe.query("dataset == 'III'"), size=6, robust=True)

In [None]:
sb.lmplot(x="x", y="y", col="dataset", hue="dataset", data=anscombe, 
 col_wrap=2, ci=None, palette="muted", size=4, scatter_kws={"s": 50, "alpha": 1})

In [None]:
# Plot tip as a function of toal bill across days
g = sb.lmplot(x="sepal_length", y="sepal_width", hue="species", 
 truncate=True, size=8, data=iris)
g.set_axis_labels("Sepal length (mm)", "Sepal width (mm)")

In [None]:
tips["big_tip"] = (tips.tip / tips.total_bill) > .15
sb.lmplot(x="total_bill", y="big_tip", data=tips, y_jitter=.03)

In [None]:
sb.lmplot(x="total_bill", y="big_tip", data=tips, y_jitter=.03, logistic=True)

In [None]:
sb.lmplot(x="total_bill", y="big_tip", data=tips, y_jitter=.03, logistic=True, hue="time")

In [None]:
sb.lmplot(x="total_bill", y="tip", data=tips, y_jitter=.03, lowess=True, hue="smoker")

In [None]:
sb.lmplot(x="total_bill", y="tip", hue="smoker", col="time", data=tips)

In [None]:
sb.lmplot(x="total_bill", y="tip", hue="smoker", col="time", row="sex", data=tips)

## Miscellaneous

In [None]:
# Initialize the figure
f, ax = plt.subplots(figsize=(15, 5))
sb.despine(bottom=True, left=True)

iris2 = pd.melt(iris, "species", var_name="measurement")
 
# Show each observation with a scatterplot
sb.stripplot(x="value", y="measurement", hue="species",
 data=iris2, jitter=True, alpha=.5)

# Show the conditional means
sb.pointplot(x="value", y="measurement", hue="species", data=iris2,
 dodge=.5, join=False, palette="dark", markers="d", scale=.9, ci=None)

# Improve the legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[3:], labels[3:], title="species", handletextpad=0, columnspacing=1,
 loc="lower right", ncol=3, frameon=True)

In [None]:
flights_long = sb.load_dataset("flights")
flights_long

In [None]:
flights = flights_long.pivot("month", "year", "passengers")
flights

In [None]:
f, ax = plt.subplots(figsize=(9, 6))
sb.heatmap(flights, annot=True, fmt="d", linewidths=.5, ax=ax)

In [None]:
from string import ascii_letters

sb.set(style="white")

# Generate a large random dataset
rs = np.random.RandomState(33)
d = pd.DataFrame(data=rs.normal(size=(100, 26)), columns=list(ascii_letters[26:]))

# Compute the correlation matrix
corr = d.corr()

# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sb.diverging_palette(220, 10, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sb.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5})

In [None]:
brain_networks = sb.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
used_columns = (brain_networks.columns.get_level_values("network").astype(int).isin(used_networks))
brain_networks = brain_networks.loc[:, used_columns]
brain_networks

In [None]:
network_pal = sb.husl_palette(8, s=.45)
network_lut = dict(zip(map(str, used_networks), network_pal))
networks = brain_networks.columns.get_level_values("network")
network_colors = pd.Series(networks, index=brain_networks.columns).map(network_lut)

sb.clustermap(brain_networks.corr(), center=0, cmap="BrBG", 
 row_colors=network_colors, col_colors=network_colors, linewidths=.75, figsize=(13, 13))

In [None]:
# Compute the correlation matrix and average over networks
corr_df = brain_networks.corr().groupby(level="network").mean()
corr_df.index = corr_df.index.astype(int)
corr_df = corr_df.sort_index().T

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 6))

# Draw a violinplot with a narrower bandwidth than the default
sb.violinplot(data=corr_df, palette="Set3", bw=.2, cut=1, linewidth=1)

# Finalize the figure
ax.set(ylim=(-.7, 1.05))
sb.despine(left=True, bottom=True)

In [None]:
# Generate an example radial datast
r = np.linspace(0, 10, num=300)
df = pd.DataFrame({'r': r, 'slow': r, 'medium': 2 * r, 'fast': 4 * r})

# Convert the dataframe to long-form or "tidy" format
df = pd.melt(df, id_vars=['r'], var_name='speed', value_name='theta')

# Set up a grid of axes with a polar projection
g = sb.FacetGrid(df, col="speed", hue="speed", subplot_kws=dict(projection='polar'), size=4.5, sharex=False, sharey=False, despine=False)

# Draw a scatterplot onto each axes in the grid
g.map(plt.scatter, "theta", "r")

In [None]:
gammas = sb.load_dataset("gammas")
gammas

In [None]:
sb.tsplot(data=gammas, time="timepoint", unit="subject", condition="ROI", value="BOLD signal")

In [None]:
sines = []
rs = np.random.RandomState(8)
for _ in range(15):
 x = np.linspace(0, 30 / 2, 30)
 y = np.sin(x) + rs.normal(0, 1.5) + rs.normal(0, .3, 30)
 sines.append(y)

# Plot the average over replicates with bootstrap resamples
sb.tsplot(sines, err_style="boot_traces", n_boot=500)