Seaborn Cheat Sheet

Complete guide for statistical data visualization with Seaborn, the Python data visualization library that provides a high-level interface for creating attractive statistical graphics.


Core Seaborn Functions

Action Code Example Description
Import and Set Style import seaborn as sns
sns.set(style="darkgrid")
Imports the library and sets a default aesthetic style for all subsequent plots, including those made with pandas or Matplotlib.
Histogram with KDE sns.histplot(data, bins=50, kde=True) Plots a histogram of the data distribution. The kde=True argument overlays a kernel-density estimate.
2D Kernel-Density Plot sns.kdeplot(x=data1, y=data2) Creates a contour plot showing the joint kernel-density estimate for two variables.
Joint Distribution Plot sns.jointplot(x=data1, y=data2, kind="hex") Visualizes the joint distribution of two variables along with their individual marginal distributions. kind="hex" creates a hexbin plot.
Box Plot sns.boxplot(my_dataframe, palette="pastel") Creates a standard boxplot showing descriptive statistics (median, quartiles, outliers) for one or more variables.
Violin Plot sns.violinplot(my_dataframe, palette="pastel") Combines a boxplot with a kernel-density estimate on each side, showing the distribution of the data.
Categorical Violin Plot sns.violinplot(x=df.index.month, y=df.outdoor) Groups the data by a categorical variable (x) and creates a violin plot for the numerical variable (y) for each category.
Heatmap sns.heatmap(pivot_table, ax=ax) Visualizes a 2D matrix of data (like a pivot table) as a color-encoded grid.

Advanced Plot Types

Statistical Plots

# Scatter plots with linear regression
sns.regplot(x="column1", y="column2", data=df,
            line_kws={"color": "red"}, scatter_kws={"alpha": 0.5})

# Plot data and confidence intervals
sns.lineplot(data=df, x="time", y="value",
             hue="category", style="method")

# Point plots with confidence intervals
sns.pointplot(data=df, x="parameter", y="score",
              hue="dataset", dodge=True)

Categorical Plots

# Bar plots for categorical data
sns.barplot(data=df, x="category", y="value", hue="group")

# Strip plots (scatter plot for categorical data)
sns.stripplot(data=df, x="category", y="value", jitter=True)

# Swarm plots (non-overlapping points)
sns.swarmplot(data=df, x="category", y="value", hue="group")

# Count plots (histogram for categorical data)
sns.countplot(data=df, x="category", hue="group")

Pairwise Relationships

# Pair plot for multiple variable relationships
sns.pairplot(df, hue="species", vars=["sepal_length", "sepal_width"])

# Pair grid with custom plots
g = sns.PairGrid(df, hue="species")
g.map_upper(sns.scatterplot)
g.map_lower(sns.kdeplot)
g.map_diag(sns.histplot)
g.add_legend()

Styling and Customization

Color Palettes

# Built-in color palettes
sns.color_palette("hls", 8)
sns.color_palette("viridis", as_cmap=True)

# Custom palette from hex colors
colors = ["#FC8EAC", "#F76C6C", "#A8D8B9"]
sns.set_palette(sns.color_palette(colors))

# Diverging palettes
sns.diverging_palette(220, 20, n=10)

Figure Styles

# Set overall style
sns.set_style("whitegrid")
sns.set_style("darkgrid")
sns.set_style("ticks")

# Context for different presentations
sns.set_context("paper")    # Small plots for publication
sns.set_context("notebook") # Standard notebook plots
sns.set_context("talk")     # Larger plots for presentations

Themes and Font Settings

# Custom theme dictionary
custom_theme = {
    "font.style": "normal",
    "text.color": "green",
    "axes.labelcolor": "green",
    "axes.edgecolor": "green"
}
sns.set_theme(style="white", rc=custom_theme)

# Font scale
sns.set_theme(font_scale=1.5)

Working with pandas DataFrames

Integration with pandas

# Load and explore data
import pandas as pd
tips = sns.load_dataset("tips")

# Get first few rows
print(tips.head())

# Check data types
print(tips.dtypes)

# Summary statistics
print(tips.describe())

Exploratory Analysis Flow

# 1. Overview of variables
sns.histplot(data=tips, x="total_bill", kde=True)

# 2. Bivariate relationships
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="smoker")

# 3. Categorical vs Numerical
sns.boxplot(data=tips, x="day", y="total_bill")
sns.violinplot(data=tips, x="day", y="total_bill")

# 4. Correlations
numerical_cols = ["total_bill", "tip", "size"]
corr = tips[numerical_cols].corr()
sns.heatmap(corr, annot=True, cmap="coolwarm")

# 5. Pairwise relationships
numerical_cols = tips.select_dtypes(include='number').columns
sns.pairplot(tips[numerical_cols])

Specialized Plots

Time Series Visualization

# Line plots with confidence intervals
sns.lineplot(data=stocks, x="date", y="price", hue="company")

# Seasonal decomposition plot
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(data, model='additive', period=365)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 10))
result.observed.plot(ax=ax1)
result.trend.plot(ax=ax2)
result.seasonal.plot(ax=ax3)
result.resid.plot(ax=ax4)

Statistical Plotting

# Residual plots
from scipy import stats
fig, ax = plt.subplots(figsize=(8, 6))
sns.residplot(x="x", y="y", data=dataframe, ax=ax)
ax.set_title("Residuals Plot")

# QQ plots
stats.probplot(data, dist="norm", plot=plt)
plt.title("Q-Q Plot")

# Correlation matrix
correlation_matrix = dataframe.corr()
sns.heatmap(correlation_matrix, annot=True, cmap="RdYlGn")

Faceted Plots

# FacetGrid for subset plotting
g = sns.FacetGrid(tips, col="time", row="smoker")
g.map(sns.scatterplot, "total_bill", "tip")

# JointGrid with marginal plots
g = sns.JointGrid(data=tips, x="total_bill", y="tip")
g.plot(sns.scatterplot, sns.histplot)

# PairGrid for multiple relationships
g = sns.PairGrid(tips, vars=["total_bill", "tip", "size"])
g.map_upper(sns.scatterplot)
g.map_lower(sns.kdeplot)
g.map_diag(sns.histplot)

Essential Seaborn Workflow

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 1. Set up the aesthetics
sns.set_theme(style="whitegrid", context="talk")

# 2. Load or prepare data
df = sns.load_dataset("tips")

# 3. Create plots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Distribution plots
sns.histplot(df["total_bill"], ax=axes[0,0])
axes[0,0].set_title("Bill Distribution")

# Categorical plots
sns.boxplot(data=df, x="day", y="total_bill", ax=axes[0,1])
axes[0,1].set_title("Bills by Day")

# Scatter plots
sns.scatterplot(data=df, x="total_bill", y="tip", hue="time", ax=axes[1,0])
axes[1,0].set_title("Bills vs Tips")

# Correlation
df_numeric = df.select_dtypes(include=[np.number])
correlation_matrix = df_numeric.corr()
sns.heatmap(correlation_matrix, ax=axes[1,1], annot=True, cmap="coolwarm")
axes[1,1].set_title("Correlation Matrix")

plt.tight_layout()
plt.show()

Best Practices

Common Mistakes to Avoid

Performance Tips

# For large datasets, use sampling or aggregation
df_sample = df.sample(n=1000)

# Use vectorized operations
df["calculated_column"] = df["col1"] * df["col2"]

# Avoid for loops in favor of vectorized operations
# Good: df["new_col"] = df["col"] * 2
# Bad: for i in range(len(df)): df.loc[i, "new_col"] = df.loc[i, "col"] * 2

Seaborn excels at creating beautiful statistical visualizations with minimal code. By understanding its core functions and customization capabilities, you can create publication-quality plots quickly and effectively.

Updated: January 15, 2025
Author: Danial Pahlavan
Category: Data Science & Visualization