|
|
import os import json import pandas as pd import matplotlib.pyplot as plt
data_dir = './'
all_data = [] all_tap_logs = []
for filename in os.listdir(data_dir): if filename.endswith('.json'): file_path = os.path.join(data_dir, filename) with open(file_path, encoding='utf-8') as file: data = json.load(file) tap_logs = data.get('sensorLog', {}).get('tapLog', []) for entry in tap_logs: entry['participant_id'] = filename all_tap_logs.append(entry)
df = pd.DataFrame(all_tap_logs)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['path'] = df['url'].apply(lambda x: x.split('://')[-1].split('/', 1)[-1])
# Mapping with tour_operators being mergeable to study page 2. :) path_to_label = { "study-page-1": "Study Page 1", "study-page-2": "Study Page 2", "study-page-3": "Study Page 3", "study-page-4": "Study Page 4", "study-page-5": "Study Page 5", "study-page-6": "Study Page 6", "tour_operators": "Study Page 2" }
df['label'] = df['path'].map(path_to_label)
completion_times = df.groupby(['participant_id', 'label'])['timestamp'].agg(['min', 'max']).reset_index() completion_times['completion_time'] = (completion_times['max'] - completion_times['min']).dt.total_seconds()
# Filter out technical outliers comp_query = 'completion_time < 500'
average_completion_times = completion_times.query(comp_query).groupby('label')['completion_time'].mean().reset_index() c_times_by_page = completion_times.query(comp_query).groupby('label', group_keys=True)[['completion_time']].apply(lambda x: x) c_times_list = [c_times_by_page.groupby('label').get_group('Study Page 1')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 2')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 3')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 4')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 5')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 6')['completion_time']]
# Draw plots plt.figure(figsize=(10, 6)) #plt.bar(average_completion_times['label'], average_completion_times['completion_time'], color='skyblue') plt.boxplot(c_times_list) plt.xlabel('Page') plt.xticks([1,2,3,4,5,6], ["1 - BudgetBird", "2 - Hotel", "3 - UVV", "4 - Iceland", "5 - Rental", "6 - QuickDeliver"]) plt.ylabel('Average Task Completion Time (s)') plt.title('Average Task Completion Time by Page') plt.xticks(rotation=45, ha='right') plt.tight_layout() plt.show()
|