Member-only story
The Question Of Order And Chaos
Data, How Are You Related?
12 min readApr 13, 2023
Life goes in cycles, it seems to mess up, but it ends perfectly well-rounded. — Re: 4 circles, each delineated with squares, 1 encircling the other, 2011–10.27
To detect if 2 sets of data are correlated, we can use some statistics as decisional logic.
# this illustrates detecting data sets as +ve correlated, -ve correlated, or non-correlated.
from numpy import mean
from numpy import std
from numpy.random import randn
from numpy.random import seed
from matplotlib import pyplot
from scipy.stats import pearsonr # import the correlation function
from scipy.stats import linregressdef compute_statistics_correlation_and_plot(data1, data2):
# summarize
print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))
# calculate Pearson's correlation
corr, _ = pearsonr(data1, data2)
print('Pearsons correlation: %.3f' % corr)
# plot the data
pyplot.scatter(data1, data2)
pyplot.title('Scatter plot of data1 vs data2')
pyplot.xlabel('data1')
pyplot.ylabel('data2')
# show the plot
pyplot.show()