The Question Of Order And Chaos

Data, How Are You Related?

Mi'kail Eli'yah
12 min readApr 13

--

Life goes in cycles, it seems to mess up, but it ends perfectly well-rounded. — Re: 4 circles, each delineated with squares, 1 encircling the other, 2011–10.27

To detect if 2 sets of data are correlated, we can use some statistics as decisional logic.

# this illustrates detecting data sets as +ve correlated, -ve correlated, or non-correlated.
from numpy import mean
from numpy import std
from numpy.random import randn
from numpy.random import seed
from matplotlib import pyplot
from scipy.stats import pearsonr # import the correlation function
from scipy.stats import linregress
def compute_statistics_correlation_and_plot(data1, data2):
# summarize
print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))
# calculate Pearson's correlation
corr, _ = pearsonr(data1, data2)
print('Pearsons correlation: %.3f' % corr)
# plot the data
pyplot.scatter(data1, data2)
pyplot.title('Scatter plot of data1 vs data2')
pyplot.xlabel('data1')
pyplot.ylabel('data2')
# show the plot
pyplot.show()
returndef compute_gradient_and_plot(data1, data2):
# calculate linear regression
slope, intercept, r_value, p_value, std_err = linregress(data1, data2)
print('slope=%.3f, intercept=%.3f, r_value=%.3f' % (slope, intercept, r_value))
# plot the data and regression line
pyplot.scatter(data1, data2)
pyplot.title('Scatter plot of data1 vs data2')
pyplot.xlabel('data1')
pyplot.ylabel('data2')
pyplot.plot(data1, intercept + slope * data1, 'r')
# show the plot
pyplot.show()
returndef generate_positive_correlated_2_data_sets(seed_initial = 1, length_data = 1000):
seed(seed_initial) # seed random number generator
# prepare data
data1 = 20 * randn(length_data) + 100
data2 = data1 + (10 * randn(length_data) + 50)
bound_upper, bound_lower = get_upper_and_lower_bounds_of_data(data1)
print(f"data 1 bounds…

--

--

Mi'kail Eli'yah