In this article, you will learn what can only be learned by spending countless hours of study and practice.

houses = pd.read_csv('data/melb_data.csv')

# Calculate pairwise-correlation
matrix = houses.corr()

# Create a mask
mask = np.triu(np.ones_like(matrix, dtype=bool))

# Create a custom diverging palette
cmap = sns.diverging_palette(250, 15, s=75, l=40,
                             n=9, center="light", as_cmap=True)

plt.figure(figsize=(16, 12))

sns.heatmap(matrix, mask=mask, center=0, annot=True,
             fmt='.2f', square=True, cmap=cmap);

>>> np.ones_like(matrix, dtype=bool)[:5]

array([[ True, True, True, True, True, True, True, True, True,
 True, True, True, True],
 [ True, True, True, True, True, True, True, True, True,
 True, True, True, True],
 [ True, True, True, True, True, True, True, True, True,
 True, True, True, True],
 [ True, True, True, True, True, True, True, True, True,
 True, True, True, True],
 [ True, True, True, True, True, True, True, True, True,
 True, True, True, True]])

sns.heatmap(matrix, mask=mask, center=0, annot=True,
               fmt='.2f', square=True, cmap=cmap)

>>> houses.CouncilArea.value_counts(dropna=False, normalize=True).head()

NaN           0.100810
Moreland      0.085641
Boroondara    0.085420
Moonee Valley 0.073417
Darebin       0.068778
Name: CouncilArea, dtype: float64

>>> missing_props = houses.isna().sum() / len(houses)
>>> missing_props[missing_props > 0].sort_values(ascending=False
BuildingArea 0.474963
YearBuilt    0.395803
CouncilArea  0.100810
Car          0.004566
dtype: float64

3. Pandas DataFrame Styler

>>> diamonds = sns.load_dataset('diamonds')

>>> pd.crosstab(diamonds.cut, diamonds.clarity).\

>>> pd.crosstab(diamonds.cut, diamonds.clarity,
          aggfunc=np.mean, values=diamonds.price).\

>>> agg_prices = pd.crosstab(diamonds.cut, diamonds.clarity,
                         aggfunc=np.mean, values=diamonds.price).\

>>> agg_prices.format('{:.2f}')

from matplotlib import rcParams


# Remove top and right spines
rcParams[''] = False
rcParams['axes.spines.right'] = False

# Set fixed figure size
rcParams['figure.figsize'] = [12, 9]

# Set dots per inch to 300, very high quality images
rcParams['figure.dpi'] = 300

# Enable autolayout
rcParams['figure.autolayout'] = True

# Set global fontsize
rcParams[''] = 16

# Fontsize of ticklabels
rcParams['xtick.labelsize'] = 10
rcParams['ytick.labelsize'] = 10

  • get_option()

    / set_option()

  • reset_option()

  • description_option()

  • option_context()

>>> pd.get_option(‘display.max_columns’)

>>> pd.set_option(‘display.max_rows’, 5)>>> houses


pd.set_option(‘plotting.backend’, ‘plotly’)

>>> df = pd.DataFrame(np.random.randn(5, 5))
>>> pd.reset_option('display.max_rows')
>>> with pd.option_context('float_format', '{:f}'.format):


