Related scripts to the Master's Thesis "Exploring Sonification in Website Navigation on Smartphones"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 lines
3.0 KiB

3 months ago
  1. import os
  2. import json
  3. import pandas as pd
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7. data_dir = './'
  8. all_data = []
  9. all_tap_logs = []
  10. for filename in os.listdir(data_dir):
  11. if filename.endswith('.json'):
  12. file_path = os.path.join(data_dir, filename)
  13. with open(file_path, encoding='utf-8') as file:
  14. data = json.load(file)
  15. tap_logs = data.get('sensorLog', {}).get('tapLog', [])
  16. for entry in tap_logs:
  17. entry['participant_id'] = filename
  18. all_tap_logs.append(entry)
  19. df = pd.DataFrame(all_tap_logs)
  20. df['timestamp'] = pd.to_datetime(df['timestamp'])
  21. df['path'] = df['url'].apply(lambda x: x.split('://')[-1].split('/', 1)[-1])
  22. # Mapping with tour_operators being mergeable to study page 2. :)
  23. path_to_label = {
  24. "study-page-1": "Study Page 1",
  25. "study-page-2": "Study Page 2",
  26. "study-page-3": "Study Page 3",
  27. "study-page-4": "Study Page 4",
  28. "study-page-5": "Study Page 5",
  29. "study-page-6": "Study Page 6",
  30. "tour_operators": "Study Page 2"
  31. }
  32. df['label'] = df['path'].map(path_to_label)
  33. def calculate_distances(group):
  34. group = group.sort_values(by='timestamp')
  35. x_diff = group['x'].diff().fillna(0)
  36. y_diff = group['y'].diff().fillna(0)
  37. distances = np.sqrt(x_diff**2 + y_diff**2)
  38. total_distance = distances.sum()
  39. return total_distance
  40. grouped = df.groupby(['participant_id', 'label'])
  41. distance_data = grouped.apply(calculate_distances).reset_index()
  42. distance_data.columns = ['participant_id', 'label', 'total_distance']
  43. def generate_heatmap_data(group):
  44. heatmap_data = group[['x', 'y']].copy()
  45. heatmap_data['radius'] = 40
  46. heatmap_data['value'] = 5
  47. heatmap_data['x'] = heatmap_data['x'].astype(str)
  48. heatmap_data['y'] = heatmap_data['y'].astype(str)
  49. heatmap_data_list = heatmap_data.to_dict(orient='records')
  50. min_value = 1
  51. max_value = 999
  52. return {
  53. "min": min_value,
  54. "max": max_value,
  55. "data": heatmap_data_list
  56. }
  57. for label, group in df.groupby('label'):
  58. heatmap_data = generate_heatmap_data(group)
  59. json_filename = f"{label.replace(' ', '_').lower()}.json"
  60. with open(json_filename, 'w', encoding='utf-8') as json_file:
  61. json.dump(heatmap_data, json_file, indent=4)
  62. print(f"Generated {json_filename} with {len(heatmap_data['data'])} records.")
  63. distance_data.to_csv('distance_data.csv', index=False)
  64. print("Distance data saved to distance_data.csv")
  65. # Filter out technical outliers...
  66. comp_query = 'total_distance < 15000'
  67. # Boxplot drawing
  68. plt.figure(figsize=(12, 6))
  69. sns.boxplot(x='label', y='total_distance', data=distance_data.query(comp_query).apply(lambda x: x))
  70. plt.xticks(rotation=45)
  71. plt.xlabel('Study Page')
  72. plt.xticks([0,1,2,3,4,5], ["1 - BudgetBird", "2 - Hotel", "3 - UVV", "4 - Iceland", "5 - Rental", "6 - QuickDeliver"])
  73. plt.ylabel('Total Distance Traveled (pixels)')
  74. plt.title('Total Distance Traveled per Study Page')
  75. plt.tight_layout()
  76. plt.savefig('distance_boxplot.png')
  77. plt.show()