Source Code for Machine Learning Correlation Data Analysis Between Food and Mood

Here is the code for Machine Learning Correlation Data Analysis Between Food and Mood
Python pandas dataframe is used in this script for calculation correlation between two time series.

# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn

pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


print('loading csv file ...')
df = pd.read_csv("data.csv")
df.columns=['X','Y']

n=15
m=15


def get_data (df_pandas,k,z):
    
    
  
    x = np.zeros(df_pandas.shape[0]) 
    y = np.zeros(df_pandas.shape[0])
       
    new_df = pd.DataFrame() #creates a new dataframe that's empty
    for index, row in df_pandas.iterrows():
       
        x[index]=df_pandas.loc[index-k:index,'X'].mean()
      
        y[index]=df_pandas.loc[index:index+z,'Y'].mean()
    
    new_df=pd.concat([pd.DataFrame(x),pd.DataFrame(y)], "columns")
    new_df.columns = ['X', 'Y']
   
    return new_df       
        
        

corr_df = pd.DataFrame() #creates a new dataframe that's empty

for i in range (1,n):
    for j in range (1,m):
   
       data=get_data(df, i, j)
       corr_df.loc[i, j] = data['X'].corr(data['Y'])

print ("corr_df")       
print (corr_df)        

def heatmap(pddf, vmax=1.0, vmin=1.0, do_mask=False):        
   print("--------------- CREATE A HEATMAP ---------------")

   if do_mask:                                                                  
      mask = np.zeros_like(pddf)
      mask[np.triu_indices_from(mask)] = True


      seaborn.heatmap(pddf, cmap='RdYlGn_r', vmax=vmax, vmin=vmin , mask = mask, linewidths=2.5)
   else:   
      seaborn.heatmap(pddf, cmap='RdYlGn_r', vmax=vmax, vmin=vmin , linewidths=2.5)
     
   # in case want reorient the labels for each column and row to make them easier to read.
   plt.yticks(rotation=0) 
   plt.xticks(rotation=0) 
   plt.show()

heatmap (corr_df, vmax=0.5, vmin=0)

Share this: