Investment Portfolio Diversification By Using Data Analysis in Python

Correlation Coefficient

Project Objective

Project Stages

Stage1: Gathering Data

def update_dataset():

import finnhub
import pandas as pd
# Setup client for Finnhub API
finnhub_client = finnhub.Client(api_key ="###############")

for company in companies:
# Stock candles
res = finnhub_client.stock_candles(company, 'D', start_date, end_date)

#Convert to Pandas Dataframe
response = pd.DataFrame(res)

#Write the CSV file out of data frame
response_csv = response.to_csv(f"{company}.csv")
print(company, 'no data found ...')

Stage 2: Cleaning and Processing Data

class DeltaObject:
def __init__(self, delta_change, percent_change, dates):
self.change = delta_change
self.percent = percent_change
self.dates = dates
def __repr__(self):
return f'Change:{self.change},Percent: {self.percent}, Dates: {self.dates}'
def refine_companies_records():

companies_records = {}

#get company data for each company that exists in file directory
for company in companies: if (company + '.csv') in files:
company_data = get_company_data(company)

close_price_list = []
for line in company_data:
date_price = []
line = line.strip('\n').split(',')

change_list = []
for i in range(1,len(close_price_list)):
delta_change = float(close_price_list[i][0]) - float(close_price_list[i-1][0])
percent_change = delta_change / float(close_price_list[i-1][0])
dates =(close_price_list[i-1][1],close_price_list[i][1])


companies_records[company]= change_list

return companies_records

Stage 3: Converting data to Pandas DataFrame

company_data = {'A':[DeltaObject0, DeltaObject1, ...], 'B': [DeltaObject0, DeltaObject1, ...], ...}

selected_company = 'C'
date_0_c = company_data['C'][0].dates
change_0_c = company_data['C'][0].change
percent_0_c = company_data['C'][0].percent

#data for company A
date_0_A = company_data['A'][0].dates
change_0_A = company_data['A'][0].change
percent_0_A = company_data['A'][0].percent

change = change_0_c * change_0_A#for determining the color
percent = percent_0_A/percent_0_c#for determining the color opacity

if change >0:
color = (0,1,0, percent)#Green color with aplpha = percent
color = (1,0,0,percent)# Red color with alpha = percent
def get_compared_datapoints(company_data):

data_points = []
dict_of_dates = {}

for i in company_data:
date = i.dates
change = i.change
percent = i.percent


for symbol in companies_records:
company_to_compare = companies_records[symbol]

for k in company_to_compare:
date_k = k.dates
change_k = k.change
percent_k = k.percent

#set color of the comparison result
if date == date_k:
if (change*change_k)<=0:
same_change = [1,0,0]#False #show in Red color
same_change = [0,1,0]#True #show in Green color

# Set Color Opacity
relational_opacity = round(abs(percent_k/percent),2)
if relational_opacity >=1:
relational_opacity = 1


relational_opacity = 0

data_point = tuple(same_change)

company_date_dict = {symbol:data_point}


dict_of_dates[date] = lc

results = dict_of_dates

#merge separate dictionaries that are created in the last step
for date in results:
for d in results[date]:
results[date] = merged_dict
#returns a nested dictionary {time:{company_1:(Color tuple,..., company_n:(color_tuple))}}
return results

Stage 4: Visualizing Data

def plot_graph():

selection = int(input("Number of companies to compare: "))

x_companies = companies[:selection]
y_date = 1

colors_data = data_frame.values[:selection]

#Plot the bar chart
for j in range(colors_data.shape[1]):
#print(f'Days-pairC #{j} Done ...')
for i in range(selection):
#print(i,j,colors_data[i][j])[i], y_date,bottom=j, color=colors_data[i][j])

#Set plot specifications
plt.title(f"Correlation with {company}'s stock price change")
plt.xticks(x_companies, rotation=90)
plt.ylabel(f"From {start_date} to {till_date}")

Interpreting the Graph




Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store