Basic Charting with Matplotlib (Scatterplot, Barchart, Lineplot)

In Python, we can plot data with matplotlib.

Basic Plotting with matplotlib

You can show matplotlib figures directly in the notebook by using the %matplotlib notebook and %matplotlib inline magic commands.

%matplotlib notebook provides an interactive environment.

%matplotlib inline

import matplotlib.pyplot as plt

# set default figure size to (14, 8)
plt.rcParams['figure.figsize'] = (14.0, 8.0)
# create a new figure
plt.figure()

# plot the point (3,2) using the circle marker
plt.plot(3, 2, 'o')

# set axis properties [xmin, xmax, ymin, ymax]
plt.axis([0,6,0,10])

# plot the point (4, 7) using the circle marker
plt.plot(4, 7, 'o')

# show figure
plt.show()

png

Scatterplots

import numpy as np

x = np.array([1,2,3,4,5,6,7,8])
y = x.copy()

colors = ['r','r','r','r','r','g','b', 'y']

plt.figure()
# plot the point with size 100 and chosen colors
plt.scatter(x, y, s=100, c=colors)
plt.show()

png

x = [1,2,3,4,5]
y = [6,7,8,9,10]

plt.figure()

# plot a data series 'Tall students' in red using the first two elements of x and y
plt.scatter(x[:2], y[:2], s=100, c='red', label='Tall students')

# plot a second data series 'Short students' in blue using the last three elements of x and y
plt.scatter(x[2:], y[2:], s=100, c='blue', label='Short students')

# add a label to the x axis
plt.xlabel('The number of times the child kicked a ball')

# add a label to the y axis
plt.ylabel('The grade of the student')

# add a title
plt.title('Relationship between ball kicking and grades')

# add a legend (uses the labels from plt.scatter)
plt.legend()

plt.show()

png

Line Plots

linear_data = np.array([1,2,3,4,5,6,7,8,9,10,12])
exponential_data = linear_data**2

plt.figure()

# plot the linear data and the exponential data
plt.plot(linear_data, '-o', exponential_data, '-o')

# plot another series with a dashed red line
plt.plot([22,44,55], '--r')

plt.xlabel('Some data')
plt.ylabel('Some other data')
plt.title('A title')

# add a legend with legend entries (because we didn't have labels when we plotted the data series)
plt.legend(['Baseline', 'Competition', 'Us'])

# fill the area between the linear data and exponential data
plt.fill_between(range(len(linear_data)), linear_data, exponential_data, facecolor='blue', alpha=0.25)
plt.show()

png

Datetime as index

plt.figure()
observation_dates = np.arange('2017-01-01', '2017-01-12', dtype='datetime64[D]')
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')
plt.show()

png

Using Pandas

import pandas as pd

plt.figure()
observation_dates = np.arange('2017-01-01', '2017-01-12', dtype='datetime64[D]')
observation_dates = list(map(pd.to_datetime, observation_dates))
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')
plt.xlabel('Date')
plt.ylabel('Units')
plt.title("Exponential ($x^2$) vs. Linear ($x$) performance")
plt.show()

png

Bar Charts

plt.figure()

xvals = range(len(linear_data))
# plot another set of bars, adjusting the new xvals to make up for the first set of bars plotted
new_xvals = [i+0.3 for i in xvals]

# This will plot a new set of bars with errorbars of length 4
plt.bar(xvals, linear_data, width = 0.3, yerr=4, color='blue')

plt.bar(new_xvals, exponential_data, width = 0.3 ,color='red')
plt.show()

png

# stacked bar charts are also possible
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3, color='b')
plt.bar(xvals, exponential_data, width = 0.3, bottom=linear_data, color='r')
plt.show()

png

# or use barh for horizontal bar charts
plt.figure()
xvals = range(len(linear_data))
plt.barh(xvals, linear_data, height = 0.3, color='b')
plt.barh(xvals, exponential_data, height = 0.3, left=linear_data, color='r')
plt.show()

png

Leave a Comment