Related scripts to the Master's Thesis "Exploring Sonification in Website Navigation on Smartphones"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
2.5 KiB

3 months ago
  1. import os
  2. import json
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. data_dir = './'
  6. all_data = []
  7. all_tap_logs = []
  8. for filename in os.listdir(data_dir):
  9. if filename.endswith('.json'):
  10. file_path = os.path.join(data_dir, filename)
  11. with open(file_path, encoding='utf-8') as file:
  12. data = json.load(file)
  13. tap_logs = data.get('sensorLog', {}).get('tapLog', [])
  14. for entry in tap_logs:
  15. entry['participant_id'] = filename
  16. all_tap_logs.append(entry)
  17. df = pd.DataFrame(all_tap_logs)
  18. df['timestamp'] = pd.to_datetime(df['timestamp'])
  19. df['path'] = df['url'].apply(lambda x: x.split('://')[-1].split('/', 1)[-1])
  20. # Mapping with tour_operators being mergeable to study page 2. :)
  21. path_to_label = {
  22. "study-page-1": "Study Page 1",
  23. "study-page-2": "Study Page 2",
  24. "study-page-3": "Study Page 3",
  25. "study-page-4": "Study Page 4",
  26. "study-page-5": "Study Page 5",
  27. "study-page-6": "Study Page 6",
  28. "tour_operators": "Study Page 2"
  29. }
  30. df['label'] = df['path'].map(path_to_label)
  31. completion_times = df.groupby(['participant_id', 'label'])['timestamp'].agg(['min', 'max']).reset_index()
  32. completion_times['completion_time'] = (completion_times['max'] - completion_times['min']).dt.total_seconds()
  33. # Filter out technical outliers
  34. comp_query = 'completion_time < 500'
  35. average_completion_times = completion_times.query(comp_query).groupby('label')['completion_time'].mean().reset_index()
  36. c_times_by_page = completion_times.query(comp_query).groupby('label', group_keys=True)[['completion_time']].apply(lambda x: x)
  37. c_times_list = [c_times_by_page.groupby('label').get_group('Study Page 1')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 2')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 3')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 4')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 5')['completion_time'], c_times_by_page.groupby('label').get_group('Study Page 6')['completion_time']]
  38. # Draw plots
  39. plt.figure(figsize=(10, 6))
  40. #plt.bar(average_completion_times['label'], average_completion_times['completion_time'], color='skyblue')
  41. plt.boxplot(c_times_list)
  42. plt.xlabel('Page')
  43. plt.xticks([1,2,3,4,5,6], ["1 - BudgetBird", "2 - Hotel", "3 - UVV", "4 - Iceland", "5 - Rental", "6 - QuickDeliver"])
  44. plt.ylabel('Average Task Completion Time (s)')
  45. plt.title('Average Task Completion Time by Page')
  46. plt.xticks(rotation=45, ha='right')
  47. plt.tight_layout()
  48. plt.show()