Here is the code for Machine Learning Correlation Data Analysis Between Food and Mood
Python pandas dataframe is used in this script for calculation correlation between two time series.
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
print('loading csv file ...')
df = pd.read_csv("data.csv")
df.columns=['X','Y']
n=15
m=15
def get_data (df_pandas,k,z):
x = np.zeros(df_pandas.shape[0])
y = np.zeros(df_pandas.shape[0])
new_df = pd.DataFrame() #creates a new dataframe that's empty
for index, row in df_pandas.iterrows():
x[index]=df_pandas.loc[index-k:index,'X'].mean()
y[index]=df_pandas.loc[index:index+z,'Y'].mean()
new_df=pd.concat([pd.DataFrame(x),pd.DataFrame(y)], "columns")
new_df.columns = ['X', 'Y']
return new_df
corr_df = pd.DataFrame() #creates a new dataframe that's empty
for i in range (1,n):
for j in range (1,m):
data=get_data(df, i, j)
corr_df.loc[i, j] = data['X'].corr(data['Y'])
print ("corr_df")
print (corr_df)
def heatmap(pddf, vmax=1.0, vmin=1.0, do_mask=False):
print("--------------- CREATE A HEATMAP ---------------")
if do_mask:
mask = np.zeros_like(pddf)
mask[np.triu_indices_from(mask)] = True
seaborn.heatmap(pddf, cmap='RdYlGn_r', vmax=vmax, vmin=vmin , mask = mask, linewidths=2.5)
else:
seaborn.heatmap(pddf, cmap='RdYlGn_r', vmax=vmax, vmin=vmin , linewidths=2.5)
# in case want reorient the labels for each column and row to make them easier to read.
plt.yticks(rotation=0)
plt.xticks(rotation=0)
plt.show()
heatmap (corr_df, vmax=0.5, vmin=0)