Related scripts to the Master's Thesis "Exploring Sonification in Website Navigation on Smartphones"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

60 lines
2.5 KiB

import os
import json
import pandas as pd
import matplotlib.pyplot as plt
data_dir = './'
all_data = []
all_tap_logs = []
for filename in os.listdir(data_dir):
if filename.endswith('.json'):
file_path = os.path.join(data_dir, filename)
with open(file_path, encoding='utf-8') as file:
data = json.load(file)
tap_logs = data.get('sensorLog', {}).get('tapLog', [])
for entry in tap_logs:
entry['participant_id'] = filename
all_tap_logs.append(entry)
df = pd.DataFrame(all_tap_logs)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['path'] = df['url'].apply(lambda x: x.split('://')[-1].split('/', 1)[-1])
# Mapping with tour_operators being mergeable to study page 2. :)
path_to_label = {
"study-page-1": "Study Page 1",
"study-page-2": "Study Page 2",
"study-page-3": "Study Page 3",
"study-page-4": "Study Page 4",
"study-page-5": "Study Page 5",
"study-page-6": "Study Page 6",
"tour_operators": "Study Page 2"
}
df['label'] = df['path'].map(path_to_label)
completion_times = df.groupby(['participant_id', 'label'])['timestamp'].agg(['min', 'max']).reset_index()
completion_times['completion_time'] = (completion_times['max'] - completion_times['min']).dt.total_seconds()
# Filter out technical outliers
comp_query = 'completion_time < 500'
average_completion_times = completion_times.query(comp_query).groupby('label')['completion_time'].mean().reset_index()
c_times_by_page = completion_times.query(comp_query).groupby('label', group_keys=True)[['completion_time']].apply(lambda x: x)
c_times_list = [c_times_by_page.groupby('label').get_group('Study Page 1')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 2')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 3')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 4')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 5')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 6')['completion_time']]
# Draw plots
plt.figure(figsize=(10, 6))
#plt.bar(average_completion_times['label'], average_completion_times['completion_time'], color='skyblue')
plt.boxplot(c_times_list)
plt.xlabel('Page')
plt.xticks([1,2,3,4,5,6], ["1 - BudgetBird", "2 - Hotel", "3 - UVV", "4 - Iceland", "5 - Rental", "6 - QuickDeliver"])
plt.ylabel('Average Task Completion Time (s)')
plt.title('Average Task Completion Time by Page')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()