Here is the code for Machine Learning Correlation Data Analysis Between Food and Mood
Python pandas dataframe is used in this script for calculation correlation between two time series.
# -*- coding: utf-8 -*- import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn pd.set_option('display.height', 1000) pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) print('loading csv file ...') df = pd.read_csv("data.csv") df.columns=['X','Y'] n=15 m=15 def get_data (df_pandas,k,z): x = np.zeros(df_pandas.shape[0]) y = np.zeros(df_pandas.shape[0]) new_df = pd.DataFrame() #creates a new dataframe that's empty for index, row in df_pandas.iterrows(): x[index]=df_pandas.loc[index-k:index,'X'].mean() y[index]=df_pandas.loc[index:index+z,'Y'].mean() new_df=pd.concat([pd.DataFrame(x),pd.DataFrame(y)], "columns") new_df.columns = ['X', 'Y'] return new_df corr_df = pd.DataFrame() #creates a new dataframe that's empty for i in range (1,n): for j in range (1,m): data=get_data(df, i, j) corr_df.loc[i, j] = data['X'].corr(data['Y']) print ("corr_df") print (corr_df) def heatmap(pddf, vmax=1.0, vmin=1.0, do_mask=False): print("--------------- CREATE A HEATMAP ---------------") if do_mask: mask = np.zeros_like(pddf) mask[np.triu_indices_from(mask)] = True seaborn.heatmap(pddf, cmap='RdYlGn_r', vmax=vmax, vmin=vmin , mask = mask, linewidths=2.5) else: seaborn.heatmap(pddf, cmap='RdYlGn_r', vmax=vmax, vmin=vmin , linewidths=2.5) # in case want reorient the labels for each column and row to make them easier to read. plt.yticks(rotation=0) plt.xticks(rotation=0) plt.show() heatmap (corr_df, vmax=0.5, vmin=0)