Seaborn

Installation

# Basic installation
pip install seaborn

# With all optional dependencies
pip install seaborn[all]

# Development version
pip install git+https://github.com/mwaskom/seaborn.git

# Check version
python -c "import seaborn as sns; print(sns.__version__)"

# List available datasets
python -c "import seaborn as sns; print(sns.get_dataset_names())"

Basic Setup

# Essential imports
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Apply default theme
sns.set_theme()

# Alternative: set specific style
sns.set_theme(style="whitegrid", palette="pastel")

# Load sample dataset
tips = sns.load_dataset("tips")
flights = sns.load_dataset("flights")
iris = sns.load_dataset("iris")
penguins = sns.load_dataset("penguins")

Core Functionality

Built-in Datasets

# Available datasets
dataset_names = sns.get_dataset_names()
print(dataset_names)

# Load specific datasets
tips = sns.load_dataset("tips")           # Restaurant tips
flights = sns.load_dataset("flights")     # Airline passenger data
iris = sns.load_dataset("iris")           # Iris flower measurements
penguins = sns.load_dataset("penguins")   # Palmer penguin data
mpg = sns.load_dataset("mpg")             # Car fuel efficiency
titanic = sns.load_dataset("titanic")     # Titanic passenger data
diamonds = sns.load_dataset("diamonds")   # Diamond characteristics
fmri = sns.load_dataset("fmri")           # fMRI brain imaging data

# Explore dataset structure
print(tips.head())
print(tips.info())
print(tips.describe())

Figure-Level vs Axes-Level Functions

# Figure-level functions (create entire figure with subplots)
sns.relplot()     # Relationships (scatter, line)
sns.displot()     # Distributions (hist, kde, ecdf)
sns.catplot()     # Categorical (bar, box, violin, etc.)
sns.lmplot()      # Linear model fits
sns.FacetGrid()   # General-purpose faceting

# Axes-level functions (work with matplotlib axes)
sns.scatterplot() # Scatter plot
sns.lineplot()    # Line plot
sns.histplot()    # Histogram
sns.kdeplot()     # Kernel density estimate
sns.boxplot()     # Box plot
sns.barplot()     # Bar plot
sns.heatmap()     # Heat map

Relational Plots

Scatter Plots

# Basic scatter plot
sns.scatterplot(data=tips, x="total_bill", y="tip")

# With categorical encoding
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="time", style="smoker")

# With size encoding
sns.scatterplot(data=tips, x="total_bill", y="tip", size="size", hue="time")

# Figure-level with faceting
sns.relplot(data=tips, x="total_bill", y="tip", 
            col="time", hue="smoker", style="smoker")

# Advanced customization
sns.relplot(data=tips, x="total_bill", y="tip",
            hue="time", size="size", style="sex",
            palette=["blue", "red"], sizes=(20, 200),
            height=5, aspect=1.2)

Line Plots

# Basic line plot
fmri = sns.load_dataset("fmri")
sns.lineplot(data=fmri, x="timepoint", y="signal")

# With confidence intervals
sns.lineplot(data=fmri, x="timepoint", y="signal", hue="event")

# Multiple grouping variables
sns.lineplot(data=fmri, x="timepoint", y="signal", 
             hue="region", style="event")

# Figure-level line plots with faceting
sns.relplot(data=fmri, kind="line",
            x="timepoint", y="signal", 
            col="region", hue="event", style="event")

# Time series with dates
flights_wide = flights.pivot(index="year", columns="month", values="passengers")
flights_wide.index = pd.date_range("1949", periods=12, freq="AS")
sns.lineplot(data=flights_wide.T)

Distribution Plots

Histograms

# Basic histogram
sns.histplot(data=penguins, x="flipper_length_mm")

# With grouping
sns.histplot(data=penguins, x="flipper_length_mm", hue="species")

# Stacked histogram
sns.histplot(data=penguins, x="flipper_length_mm", hue="species", multiple="stack")

# Density histogram
sns.histplot(data=penguins, x="flipper_length_mm", stat="density")

# 2D histogram
sns.histplot(data=penguins, x="flipper_length_mm", y="bill_length_mm")

# Figure-level distributions
sns.displot(data=penguins, x="flipper_length_mm", col="species")
sns.displot(data=penguins, x="flipper_length_mm", hue="species", kind="kde")

KDE Plots

# Basic KDE
sns.kdeplot(data=penguins, x="flipper_length_mm")

# Multiple distributions
sns.kdeplot(data=penguins, x="flipper_length_mm", hue="species")

# Filled KDE
sns.kdeplot(data=penguins, x="flipper_length_mm", hue="species", fill=True)

# 2D KDE
sns.kdeplot(data=penguins, x="flipper_length_mm", y="bill_length_mm")

# Bivariate with contours
sns.kdeplot(data=penguins, x="flipper_length_mm", y="bill_length_mm", 
            levels=5, thresh=0.1)

# Combined histogram and KDE
sns.histplot(data=penguins, x="flipper_length_mm", kde=True)

ECDF Plots

# Empirical Cumulative Distribution Function
sns.ecdfplot(data=penguins, x="flipper_length_mm")

# With grouping
sns.ecdfplot(data=penguins, x="flipper_length_mm", hue="species")

# Complementary ECDF
sns.ecdfplot(data=penguins, x="flipper_length_mm", complementary=True)

# Figure-level ECDF
sns.displot(data=penguins, x="flipper_length_mm", kind="ecdf", 
            col="species", height=4)

Categorical Plots

Bar Plots

# Basic bar plot (shows mean with confidence interval)
sns.barplot(data=tips, x="day", y="total_bill")

# With grouping
sns.barplot(data=tips, x="day", y="total_bill", hue="time")

# Different estimator
sns.barplot(data=tips, x="day", y="total_bill", estimator=np.median)

# Count plot (frequency of categories)
sns.countplot(data=tips, x="day")
sns.countplot(data=tips, x="day", hue="time")

# Horizontal bar plot
sns.barplot(data=tips, x="total_bill", y="day", orient="h")

Box and Violin Plots

# Box plots
sns.boxplot(data=tips, x="day", y="total_bill")
sns.boxplot(data=tips, x="day", y="total_bill", hue="smoker")

# Violin plots
sns.violinplot(data=tips, x="day", y="total_bill")
sns.violinplot(data=tips, x="day", y="total_bill", hue="smoker", split=True)

# Box plot with strip plot overlay
sns.boxplot(data=tips, x="day", y="total_bill", color="lightgray")
sns.stripplot(data=tips, x="day", y="total_bill", size=4, jitter=True)

Point and Strip Plots

# Strip plot (categorical scatter)
sns.stripplot(data=tips, x="day", y="total_bill")

# Swarm plot (non-overlapping points)
sns.swarmplot(data=tips, x="day", y="total_bill")

# Point plot (connect means)
sns.pointplot(data=tips, x="day", y="total_bill", hue="time")

# Figure-level categorical plots
sns.catplot(data=tips, x="day", y="total_bill", kind="violin", 
            col="time", hue="smoker")

sns.catplot(data=tips, x="day", y="total_bill", kind="swarm", 
            row="time", col="sex")

Statistical Visualizations

Regression Plots

# Simple linear regression
sns.regplot(data=tips, x="total_bill", y="tip")

# Without regression line
sns.regplot(data=tips, x="total_bill", y="tip", fit_reg=False)

# Different regression order
sns.regplot(data=tips, x="total_bill", y="tip", order=2)

# Logistic regression
sns.regplot(data=tips, x="total_bill", y="tip", logistic=True)

# Linear model plot with faceting
sns.lmplot(data=tips, x="total_bill", y="tip", col="time", hue="smoker")

# Residual plots
sns.residplot(data=tips, x="total_bill", y="tip")

Pair Plots

# Pairwise relationships
sns.pairplot(data=iris)

# With categorical encoding
sns.pairplot(data=iris, hue="species")

# Subset of variables
sns.pairplot(data=iris, vars=["sepal_length", "sepal_width"], hue="species")

# Different plot types on diagonal
sns.pairplot(data=iris, hue="species", diag_kind="kde")

# Custom plot types
sns.pairplot(data=iris, hue="species", 
             plot_kws={"alpha": 0.6}, diag_kws={"shade": True})

Joint Plots

# Basic joint plot
sns.jointplot(data=tips, x="total_bill", y="tip")

# Different plot types
sns.jointplot(data=tips, x="total_bill", y="tip", kind="reg")
sns.jointplot(data=tips, x="total_bill", y="tip", kind="hex")
sns.jointplot(data=tips, x="total_bill", y="tip", kind="kde")

# With categorical data
sns.jointplot(data=penguins, x="flipper_length_mm", y="bill_length_mm", 
              hue="species")

# Custom marginal plots
g = sns.jointplot(data=penguins, x="flipper_length_mm", y="bill_length_mm")
g.plot_joint(sns.kdeplot, color="r", zorder=0, levels=6)
g.plot_marginals(sns.rugplot, color="r", height=-0.15)

Multi-plot Grids

FacetGrid

# Create FacetGrid
g = sns.FacetGrid(tips, col="time", row="smoker", margin_titles=True)

# Map function to each facet
g.map(sns.scatterplot, "total_bill", "tip", alpha=0.7)
g.add_legend()

# Different functions for different positions
g = sns.FacetGrid(tips, col="time", hue="smoker")
g.map(plt.scatter, "total_bill", "tip", alpha=0.7)
g.add_legend()

# Custom function
def scatter_with_corr(x, y, **kwargs):
    ax = plt.gca()
    corr = np.corrcoef(x, y)[0, 1]
    ax.annotate(f'r = {corr:.2f}', xy=(0.1, 0.9), xycoords=ax.transAxes)
    ax.scatter(x, y, **kwargs)

g = sns.FacetGrid(tips, col="time")
g.map(scatter_with_corr, "total_bill", "tip")

PairGrid

# Create PairGrid
g = sns.PairGrid(iris, hue="species")

# Map different plot types
g.map_diag(sns.histplot)
g.map_offdiag(sns.scatterplot)
g.add_legend()

# Different plots for upper and lower triangles
g = sns.PairGrid(iris)
g.map_upper(sns.scatterplot)
g.map_lower(sns.kdeplot, fill=True)
g.map_diag(sns.histplot, kde=True)

JointGrid

# Create JointGrid
g = sns.JointGrid(data=penguins, x="flipper_length_mm", y="bill_length_mm")

# Add plots
g.plot(sns.scatterplot, sns.histplot)

# Custom styling
g = sns.JointGrid(data=penguins, x="flipper_length_mm", y="bill_length_mm")
g.plot(sns.scatterplot, sns.histplot, alpha=0.7, edgecolor=".2", linewidth=0.5)

# Mixed plot types
g = sns.JointGrid(data=penguins, x="flipper_length_mm", y="bill_length_mm")
g.plot(sns.regplot, sns.boxplot)

Heat Maps

Basic Heat Maps

# Correlation matrix
corr = tips.corr(numeric_only=True)
sns.heatmap(corr)

# With annotations
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0)

# Custom formatting
sns.heatmap(corr, annot=True, fmt='.2f', square=True, 
            linewidths=0.5, cbar_kws={"shrink": 0.8})

# Pivot table heatmap
flights_pivot = flights.pivot(index="month", columns="year", values="passengers")
sns.heatmap(flights_pivot, cmap="YlOrRd")

Cluster Map

# Hierarchical clustering
iris_num = iris.select_dtypes(include=[np.number])
sns.clustermap(iris_num, cmap='viridis', standard_scale=1)

# With annotations
sns.clustermap(corr, annot=True, cmap='RdBu_r', center=0)

# Control clustering
sns.clustermap(flights_pivot, col_cluster=False, cmap='Blues')

Styling and Themes

Built-in Themes

# Available styles
print(sns.axes_style.__doc__)

# Set different styles
styles = ['darkgrid', 'whitegrid', 'dark', 'white', 'ticks']

for style in styles:
    sns.set_theme(style=style)
    plt.figure(figsize=(6, 4))
    sns.lineplot(data=fmri.query('region=="frontal"'), 
                 x="timepoint", y="signal", hue="event")
    plt.title(f'Style: {style}')
    plt.show()

Color Palettes

# Qualitative palettes
sns.color_palette("deep")          # Default seaborn colors
sns.color_palette("muted")         # Muted version
sns.color_palette("bright")        # Bright version
sns.color_palette("pastel")        # Pastel version
sns.color_palette("dark")          # Dark version

# Sequential palettes
sns.color_palette("Blues")         # Single hue
sns.color_palette("viridis")       # Perceptually uniform
sns.color_palette("rocket")        # Seaborn sequential

# Diverging palettes
sns.color_palette("RdBu")          # Red-Blue diverging
sns.color_palette("coolwarm")      # Cool-warm
sns.color_palette("vlag")          # Seaborn diverging

# Custom palettes
colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7"]
sns.set_palette(colors)

# Using palettes in plots
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="time", 
                palette="viridis")

Contexts (Scaling)

# Available contexts
contexts = ['paper', 'notebook', 'talk', 'poster']

for context in contexts:
    sns.set_context(context)
    plt.figure(figsize=(8, 6))
    sns.boxplot(data=tips, x="day", y="total_bill")
    plt.title(f'Context: {context}')
    plt.show()

# Custom scaling
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})

Custom Styling

# Custom theme dictionary
custom_theme = {
    "axes.spines.right": False,
    "axes.spines.top": False,
    "axes.grid": True,
    "axes.grid.alpha": 0.3,
    "grid.linewidth": 0.8,
    "font.family": ["serif"],
    "font.size": 12
}

# Apply custom theme
with sns.axes_style(custom_theme):
    sns.scatterplot(data=tips, x="total_bill", y="tip", hue="time")

# Persistent custom theme
sns.set_theme(rc=custom_theme)

Despining

# Remove spines
sns.despine()                      # Remove top and right
sns.despine(left=True)            # Also remove left
sns.despine(offset=10)            # Offset spines
sns.despine(trim=True)            # Trim spines to data range

# In context
plt.figure(figsize=(8, 6))
sns.boxplot(data=tips, x="day", y="total_bill")
sns.despine(offset=5, trim=True)

Advanced Features

Custom Color Maps and Normalization

# Custom discrete palette
from matplotlib.colors import ListedColormap
custom_colors = ["#FF6B6B", "#4ECDC4", "#45B7D1"]
custom_cmap = ListedColormap(custom_colors)

# Use in heatmap
sns.heatmap(flights_pivot, cmap=custom_cmap)

# Color normalization
from matplotlib.colors import LogNorm, PowerNorm

# Log normalization for highly skewed data
sns.heatmap(flights_pivot, norm=LogNorm())

# Power normalization
sns.heatmap(flights_pivot, norm=PowerNorm(gamma=0.5))

Statistical Annotations

# Add statistical annotations manually
from scipy import stats

fig, ax = plt.subplots()
sns.boxplot(data=tips, x="time", y="total_bill", ax=ax)

# Perform statistical test
lunch_bills = tips[tips['time'] == 'Lunch']['total_bill']
dinner_bills = tips[tips['time'] == 'Dinner']['total_bill']
t_stat, p_value = stats.ttest_ind(lunch_bills, dinner_bills)

# Add annotation
ax.text(0.5, 0.95, f'p-value: {p_value:.4f}', 
        transform=ax.transAxes, ha='center', va='top',
        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

Interactive Elements

# Using matplotlib widgets with seaborn
from matplotlib.widgets import CheckButtons

# Create plot
fig, ax = plt.subplots(figsize=(10, 6))
species_list = penguins['species'].unique()
lines = []

for species in species_list:
    data = penguins[penguins['species'] == species]
    line = ax.scatter(data['flipper_length_mm'], data['bill_length_mm'], 
                     label=species, alpha=0.7)
    lines.append(line)

# Add checkboxes
rax = plt.axes([0.02, 0.5, 0.15, 0.15])
check = CheckButtons(rax, species_list, [True] * len(species_list))

def toggle_species(label):
    index = species_list.tolist().index(label)
    lines[index].set_visible(not lines[index].get_visible())
    plt.draw()

check.on_clicked(toggle_species)
ax.legend()
plt.show()

Integration with Other Libraries

Pandas Integration

# Direct pandas plotting with seaborn style
sns.set_theme()
tips.plot(x='total_bill', y='tip', kind='scatter')

# Using pandas groupby with seaborn
grouped_data = tips.groupby(['day', 'time'])['total_bill'].mean().reset_index()
sns.barplot(data=grouped_data, x='day', y='total_bill', hue='time')

# Melting data for seaborn
tips_long = pd.melt(tips, id_vars=['time', 'day'], 
                   value_vars=['total_bill', 'tip'])
sns.boxplot(data=tips_long, x='variable', y='value', hue='time')

Statistical Testing Integration

from scipy.stats import ttest_ind
import statannotations.stats as stats_annotations

# Statistical annotations on plots
ax = sns.boxplot(data=tips, x='time', y='total_bill')

# Add significance testing
box_pairs = [('Lunch', 'Dinner')]
annotator = stats_annotations.Annotator(ax, box_pairs, data=tips, 
                                       x='time', y='total_bill')
annotator.configure(test='t-test_ind', text_format='star')
annotator.apply_and_annotate()

Machine Learning Integration

from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# Load data
digits = load_digits()
X, y = digits.data, digits.target

# PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# K-means clustering
kmeans = KMeans(n_clusters=10, random_state=42)
clusters = kmeans.fit_predict(X)

# Create DataFrame for seaborn
df_ml = pd.DataFrame({
    'PC1': X_pca[:, 0],
    'PC2': X_pca[:, 1],
    'True_Label': y,
    'Cluster': clusters
})

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

sns.scatterplot(data=df_ml, x='PC1', y='PC2', hue='True_Label', 
                palette='tab10', ax=axes[0])
axes[0].set_title('True Labels')

sns.scatterplot(data=df_ml, x='PC1', y='PC2', hue='Cluster', 
                palette='tab10', ax=axes[1])
axes[1].set_title('K-means Clusters')

plt.tight_layout()

Common Use Cases

Exploratory Data Analysis

def explore_dataset(df, target_column=None):
    """Comprehensive EDA function using seaborn"""

    # Dataset overview
    print(f"Dataset shape: {df.shape}")
    print(f"Missing values:\n{df.isnull().sum()}")

    # Numeric columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns

    # Categorical columns
    cat_cols = df.select_dtypes(include=['object', 'category']).columns

    # Correlation heatmap
    if len(numeric_cols) > 1:
        plt.figure(figsize=(10, 8))
        sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm', center=0)
        plt.title('Correlation Matrix')
        plt.tight_layout()
        plt.show()

    # Distribution plots
    if len(numeric_cols) > 0:
        n_cols = min(3, len(numeric_cols))
        n_rows = (len(numeric_cols) + n_cols - 1) // n_cols

        fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows))
        axes = axes.flatten() if n_rows > 1 else [axes]

        for i, col in enumerate(numeric_cols):
            if target_column and target_column in df.columns:
                sns.histplot(data=df, x=col, hue=target_column, ax=axes[i], kde=True)
            else:
                sns.histplot(data=df, x=col, ax=axes[i], kde=True)
            axes[i].set_title(f'Distribution of {col}')

        plt.tight_layout()
        plt.show()

    # Pairplot
    if len(numeric_cols) > 1 and len(numeric_cols) <= 6:
        if target_column and target_column in df.columns:
            sns.pairplot(df, vars=numeric_cols, hue=target_column)
        else:
            sns.pairplot(df[numeric_cols])
        plt.show()

# Usage
explore_dataset(tips, target_column='time')

Time Series Visualization

# Prepare time series data
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=365, freq='D')
values = np.cumsum(np.random.randn(365)) + 100
ts_data = pd.DataFrame({'date': dates, 'value': values})
ts_data['month'] = ts_data['date'].dt.month
ts_data['day_of_week'] = ts_data['date'].dt.day_name()

# Time series plots
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Line plot
sns.lineplot(data=ts_data, x='date', y='value', ax=axes[0])
axes[0].set_title('Time Series')

# Monthly boxplot
sns.boxplot(data=ts_data, x='month', y='value', ax=axes[1])
axes[1].set_title('Monthly Distribution')

# Day of week pattern
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
sns.boxplot(data=ts_data, x='day_of_week', y='value', order=day_order, ax=axes[2])
axes[2].set_title('Day of Week Pattern')
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

Scientific Publication Plots

def publication_plot():
    """Create publication-ready plots"""

    # Set publication style
    sns.set_theme(style="white", context="paper", font_scale=1.2)

    # Create figure with specific size (for journal requirements)
    fig = plt.figure(figsize=(8.5, 11))  # US Letter size

    # Multiple subplots
    gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 1.2])

    # Plot A: Scatter with regression
    ax1 = fig.add_subplot(gs[0, 0])
    sns.scatterplot(data=tips, x="total_bill", y="tip", alpha=0.6, ax=ax1)
    sns.regplot(data=tips, x="total_bill", y="tip", scatter=False, 
                color='red', ax=ax1)
    ax1.set_title('A', fontweight='bold', loc='left')
    ax1.set_xlabel('Total Bill ($)')
    ax1.set_ylabel('Tip ($)')

    # Plot B: Box plot with statistics
    ax2 = fig.add_subplot(gs[0, 1])
    sns.boxplot(data=tips, x="time", y="total_bill", ax=ax2)
    ax2.set_title('B', fontweight='bold', loc='left')
    ax2.set_xlabel('Time')
    ax2.set_ylabel('Total Bill ($)')

    # Plot C: Violin plot
    ax3 = fig.add_subplot(gs[1, :])
    sns.violinplot(data=tips, x="day", y="total_bill", hue="time", ax=ax3)
    ax3.set_title('C', fontweight='bold', loc='left')
    ax3.set_xlabel('Day of Week')
    ax3.set_ylabel('Total Bill ($)')

    # Plot D: Correlation heatmap
    ax4 = fig.add_subplot(gs[2, :])
    corr = tips.select_dtypes(include=[np.number]).corr()
    sns.heatmap(corr, annot=True, cmap='RdBu_r', center=0, ax=ax4,
                square=True, fmt='.2f')
    ax4.set_title('D', fontweight='bold', loc='left')

    # Remove spines for cleaner look
    for ax in [ax1, ax2, ax3]:
        sns.despine(ax=ax)

    plt.tight_layout()
    return fig

# Create and save publication plot
fig = publication_plot()
fig.savefig('publication_plot.pdf', dpi=300, bbox_inches='tight', 
            facecolor='white', edgecolor='none')
plt.show()

Best Practices

Performance Tips

# Use appropriate figure sizes
sns.set_context("notebook")  # Instead of making everything larger

# Efficient color palettes
# Good: Use built-in palettes
sns.set_palette("husl")

# Avoid: Creating custom palettes repeatedly in loops
# for i in range(100):
#     custom_palette = ["#FF6B6B", "#4ECDC4", "#45B7D1"]  # Inefficient

# Batch processing for multiple plots
def create_multiple_plots(data, columns):
    """Efficiently create multiple plots"""
    n_cols = len(columns)
    fig, axes = plt.subplots(1, n_cols, figsize=(5*n_cols, 5))

    for i, col in enumerate(columns):
        ax = axes[i] if n_cols > 1 else axes
        sns.histplot(data=data, x=col, ax=ax)
        ax.set_title(col)

    plt.tight_layout()
    return fig

Memory Management

# For large datasets, sample data
def plot_large_dataset(df, sample_size=10000):
    """Handle large datasets efficiently"""
    if len(df) > sample_size:
        df_sample = df.sample(n=sample_size, random_state=42)
        print(f"Sampling {sample_size} rows from {len(df)} total rows")
    else:
        df_sample = df

    return sns.scatterplot(data=df_sample, x='x', y='y')

# Close figures to free memory
plt.close('all')  # Close all figures
plt.close(fig)    # Close specific figure

Aesthetic Consistency

# Create consistent style function
def set_publication_style():
    """Set consistent publication-ready style"""
    sns.set_theme(
        style="white",
        context="paper",
        font_scale=1.2,
        rc={
            "axes.spines.right": False,
            "axes.spines.top": False,
            "axes.grid": True,
            "axes.grid.alpha": 0.3,
            "figure.facecolor": "white",
            "axes.facecolor": "white"
        }
    )

# Use consistent color schemes
COLORS = {
    'primary': '#1f77b4',
    'secondary': '#ff7f0e',
    'success': '#2ca02c',
    'danger': '#d62728',
    'warning': '#ff7f0e',
    'info': '#17a2b8',
    'light': '#f8f9fa',
    'dark': '#343a40'
}

# Apply consistent colors
sns.scatterplot(data=tips, x="total_bill", y="tip", color=COLORS['primary'])

Troubleshooting Common Issues

Data Format Issues

# Ensure proper data types
def prepare_data_for_seaborn(df):
    """Prepare DataFrame for seaborn plotting"""
    df = df.copy()

    # Convert categorical variables
    for col in df.select_dtypes(include=['object']).columns:
        if df[col].nunique() < 10:  # Arbitrary threshold
            df[col] = df[col].astype('category')

    # Handle datetime columns
    datetime_cols = df.select_dtypes(include=['datetime64']).columns
    for col in datetime_cols:
        df[f'{col}_year'] = df[col].dt.year
        df[f'{col}_month'] = df[col].dt.month

    return df

# Handle missing values
def handle_missing_data(df, strategy='drop'):
    """Handle missing data for plotting"""
    if strategy == 'drop':
        return df.dropna()
    elif strategy == 'fill_numeric':
        df_clean = df.copy()
        numeric_cols = df_clean.select_dtypes(include=[np.number]).columns
        df_clean[numeric_cols] = df_clean[numeric_cols].fillna(df_clean[numeric_cols].mean())
        return df_clean
    return df

Plot Customization Issues

# Fix overlapping labels
def fix_overlapping_labels(ax):
    """Fix common label overlap issues"""
    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
    plt.tight_layout()

# Handle legend issues
def fix_legend_issues(ax, title=None, loc='best'):
    """Standardize legend appearance"""
    legend = ax.legend(title=title, loc=loc, frameon=True, 
                      fancybox=True, shadow=True)
    legend.get_frame().set_facecolor('white')
    legend.get_frame().set_alpha(0.9)

# Consistent axis formatting
def format_axes(ax, xlabel=None, ylabel=None, title=None):
    """Apply consistent axis formatting"""
    if xlabel:
        ax.set_xlabel(xlabel, fontweight='bold')
    if ylabel:
        ax.set_ylabel(ylabel, fontweight='bold')
    if title:
        ax.set_title(title, fontweight='bold', pad=20)

    # Format tick labels
    ax.tick_params(axis='both', which='major', labelsize=10)

    return ax

Quick Reference

Essential Functions

Function	Purpose	Example
`sns.scatterplot()`	Scatter plot	`sns.scatterplot(data=df, x='x', y='y', hue='category')`
`sns.lineplot()`	Line plot	`sns.lineplot(data=df, x='time', y='value')`
`sns.histplot()`	Histogram	`sns.histplot(data=df, x='values', hue='group')`
`sns.boxplot()`	Box plot	`sns.boxplot(data=df, x='category', y='values')`
`sns.heatmap()`	Heat map	`sns.heatmap(df.corr(), annot=True)`
`sns.pairplot()`	Pair plot	`sns.pairplot(data=df, hue='species')`

Figure-Level Functions

Function	Axes-Level Equivalent	Use Case
`sns.relplot()`	`sns.scatterplot()`, `sns.lineplot()`	Relationships with faceting
`sns.displot()`	`sns.histplot()`, `sns.kdeplot()`, `sns.ecdfplot()`	Distributions with faceting
`sns.catplot()`	`sns.boxplot()`, `sns.violinplot()`, etc.	Categories with faceting
`sns.lmplot()`	`sns.regplot()`	Linear models with faceting

Common Parameters

Parameter	Purpose	Values
`data`	DataFrame	pandas DataFrame
`x`, `y`	Variables to plot	Column names
`hue`	Grouping variable (color)	Column name
`style`	Grouping variable (style)	Column name
`size`	Grouping variable (size)	Column name
`col`, `row`	Faceting variables	Column names
`palette`	Color palette	'viridis', 'Set1', custom list
`alpha`	Transparency	0.0 to 1.0

Color Palettes

Type	Examples	Use Case
Qualitative	'Set1', 'tab10', 'husl'	Categorical data
Sequential	'viridis', 'Blues', 'rocket'	Ordered data
Diverging	'RdBu', 'coolwarm', 'vlag'	Data with meaningful center

Styling Contexts

Context	Use Case	Relative Size
`paper`	Journal figures	Smallest
`notebook`	Jupyter notebooks	Default
`talk`	Presentations	Larger
`poster`	Conference posters	Largest