# The Question Of Order And Chaos

## Data, How Are You Related?

12 min readApr 13

--

Life goes in cycles, it seems to mess up, but it ends perfectly well-rounded. — Re: 4 circles, each delineated with squares, 1 encircling the other, 2011–10.27

To detect if 2 sets of data are correlated, we can use some statistics as decisional logic.

# this illustrates detecting data sets as +ve correlated, -ve correlated, or non-correlated.

from numpy import mean

from numpy import std

from numpy.random import randn

from numpy.random import seed

from matplotlib import pyplot

from scipy.stats import pearsonr # import the correlation function

from scipy.stats import linregressdefcompute_statistics_correlation_and_plot(data1, data2):

# summarize

print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))

print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))

# calculate Pearson's correlation

corr, _ = pearsonr(data1, data2)

print('Pearsons correlation: %.3f' % corr)

# plot the data

pyplot.scatter(data1, data2)

pyplot.title('Scatter plot of data1 vs data2')

pyplot.xlabel('data1')

pyplot.ylabel('data2')

# show the plot

pyplot.show() returndefcompute_gradient_and_plot(data1, data2):

# calculate linear regression

slope, intercept, r_value, p_value, std_err = linregress(data1, data2)

print('slope=%.3f, intercept=%.3f, r_value=%.3f' % (slope, intercept, r_value))

# plot the data and regression line

pyplot.scatter(data1, data2)

pyplot.title('Scatter plot of data1 vs data2')

pyplot.xlabel('data1')

pyplot.ylabel('data2')

pyplot.plot(data1, intercept + slope * data1, 'r')

# show the plot

pyplot.show() returndefgenerate_positive_correlated_2_data_sets(seed_initial = 1, length_data = 1000):

seed(seed_initial) # seed random number generator

# prepare data

data1 = 20 * randn(length_data) + 100

data2 = data1 + (10 * randn(length_data) + 50)bound_upper, bound_lower = get_upper_and_lower_bounds_of_data(data1)

print(f"data 1 bounds…