Source code for EDAhelper.plot_histogram

import pandas as pd
import altair as alt


[docs]def plot_histogram(data, columns=["all"], num_bins=30): """ Creates histograms for numerical features within a dataframe using Altair. Parameters ---------- data : pd.DataFrame A pandas dataframe columns : list, default=["all"] A list of numerical features for which to create histograms, or by default will plot all numerical features in dataframe. num_bins : integer, default=30 Number of bins in histogram plot, default is 30 bins. Returns ------ plot : altair.Chart object An Altair plot for each specified numerical feature Examples -------- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B' : [1.5, 2.5, 3.5], 'C' : ['one', 'two', 'three']}), >>> plot_histogram(df, columns = ['A', 'B']) """ # Check inputs if not isinstance(data, pd.DataFrame): raise TypeError("'data' should be a pandas.DataFrame object") if not isinstance(columns, list): raise TypeError("'columns' should be a list object") if not isinstance(num_bins, int): raise TypeError("'num_bins' should be an integer") if columns == ["all"]: numeric_cols = list(data.select_dtypes("number")) else: numeric_cols = columns plot = alt.Chart(data).mark_bar().encode(x=alt.X(alt.repeat(), type="quantitative", bin=alt.Bin(maxbins=num_bins)), y=alt.Y("count()")).properties(height=200, width=200).repeat(repeat=numeric_cols, columns=3) return plot