TIME_SERIES_STYLE = 'src/time_series.mplstyle'
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_xml(
open('data/apple_health_export/dati esportati.xml', 'r'),
xpath="//Record"
)
df = df.loc[3:] #check this
df = df.drop('MetadataEntry', axis=1)
# Convert to datetime
cols = ['creationDate','startDate','endDate']
for col in cols:
df[col] = pd.to_datetime(df[col])
# Set as index a middle date between startDate and endDate
df['middleDate'] = df.startDate + (df.endDate - df.startDate) / 2
df = df.set_index('middleDate')
print(df.sample(3).to_markdown())
df.type.unique()
array(['HKQuantityTypeIdentifierStepCount',
'HKQuantityTypeIdentifierDistanceWalkingRunning',
'HKQuantityTypeIdentifierFlightsClimbed',
'HKQuantityTypeIdentifierHeadphoneAudioExposure',
'HKQuantityTypeIdentifierWalkingDoubleSupportPercentage',
'HKQuantityTypeIdentifierWalkingSpeed',
'HKQuantityTypeIdentifierWalkingStepLength',
'HKQuantityTypeIdentifierWalkingAsymmetryPercentage'], dtype=object)
step_count = df[df.type=='HKQuantityTypeIdentifierStepCount']
walked_km = df[df.type=='HKQuantityTypeIdentifierDistanceWalkingRunning']
climbed_floors = df[df.type=='HKQuantityTypeIdentifierFlightsClimbed']
We take into account data from 2019-10 on because earlier data are noisy
with plt.style.context(TIME_SERIES_STYLE):
date_start='2019-10'
date_end=None
collection = [(step_count, 'n. of steps'), (walked_km, 'n. of km walked'), (climbed_floors, 'n. of floors climbed')]
fig, axs = plt.subplots(3,1, figsize=(20,12), sharex=True)
axs[0].set_title('Daily information about steps, kilometres and floors', fontsize=17)
for ax, data in zip(axs, collection):
d = data[0].resample('D').sum().loc[date_start:date_end].value
ax.bar(x=d.index, height=d.values)
ax.grid(axis='x')
ax.set_ylabel(data[1], fontsize=12)
#plt.savefig('img/output/daily_info.png', transparent=True)
# Average km walked for each week day (hour by hour)
def mean_quantity_eachday_hbh(df, day=None):
# Hourly resampling
data = df.resample('H').sum().loc['2019-10':'2020-03-15']
# Group by '%A %H:%M', i.e. 'DayOfTheWeek Hour:Minute'
# and compute mean and std
m = data.groupby(data.index.strftime('%A %H:%M')).mean()
s = data.groupby(data.index.strftime('%A %H:%M')).std()
if day:
m_d = m[m.index.str.contains(day)].cumsum().value.rename(day).values
s_d = s[s.index.str.contains(day)].value.values
return m_d, s_d
else:
return m, s
def plot_mean_quantity_eachday_hbh(df, list_of_days, fig, ax):
for day in list_of_days:
m_d, s_d = mean_quantity_eachday_hbh(df, day)
ax.plot(range(0,24), m_d, label=day)
ax.fill_between(range(0,24), m_d-s_d, m_d+s_d, alpha=.1)
ax.set_xticks(range(0,24, 3)) #labels=['00:00', '05:00', '10:00', '15:00', '20:00'])
ax.set_xticklabels(labels=['00:00', '03:00', '06:00', '09:00', '12:00', '15:00', '18:00', '21:00'])
ax.set_xlabel('Time', fontsize=12)
ax.set_ylabel('n. of floors climbed', fontsize=12)
ax.legend()
ax.set_title('Average number of flights of stairs climbed \n(data from 2019-10 to 2020-03)', fontsize=15)
ax.grid(axis='x')
with plt.style.context(TIME_SERIES_STYLE):
plot_mean_quantity_eachday_hbh(
climbed_floors,
['Monday', 'Tuesday','Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
*plt.subplots()
)
#plt.savefig('img/output/hourly_floors_climbed.png', transparent=True)