Related scripts to the Master's Thesis "Exploring Sonification in Website Navigation on Smartphones"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.0 KiB

import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data_dir = './'
all_data = []
all_tap_logs = []
for filename in os.listdir(data_dir):
if filename.endswith('.json'):
file_path = os.path.join(data_dir, filename)
with open(file_path, encoding='utf-8') as file:
data = json.load(file)
tap_logs = data.get('sensorLog', {}).get('tapLog', [])
for entry in tap_logs:
entry['participant_id'] = filename
all_tap_logs.append(entry)
df = pd.DataFrame(all_tap_logs)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['path'] = df['url'].apply(lambda x: x.split('://')[-1].split('/', 1)[-1])
# Mapping with tour_operators being mergeable to study page 2. :)
path_to_label = {
"study-page-1": "Study Page 1",
"study-page-2": "Study Page 2",
"study-page-3": "Study Page 3",
"study-page-4": "Study Page 4",
"study-page-5": "Study Page 5",
"study-page-6": "Study Page 6",
"tour_operators": "Study Page 2"
}
df['label'] = df['path'].map(path_to_label)
def calculate_distances(group):
group = group.sort_values(by='timestamp')
x_diff = group['x'].diff().fillna(0)
y_diff = group['y'].diff().fillna(0)
distances = np.sqrt(x_diff**2 + y_diff**2)
total_distance = distances.sum()
return total_distance
grouped = df.groupby(['participant_id', 'label'])
distance_data = grouped.apply(calculate_distances).reset_index()
distance_data.columns = ['participant_id', 'label', 'total_distance']
def generate_heatmap_data(group):
heatmap_data = group[['x', 'y']].copy()
heatmap_data['radius'] = 40
heatmap_data['value'] = 5
heatmap_data['x'] = heatmap_data['x'].astype(str)
heatmap_data['y'] = heatmap_data['y'].astype(str)
heatmap_data_list = heatmap_data.to_dict(orient='records')
min_value = 1
max_value = 999
return {
"min": min_value,
"max": max_value,
"data": heatmap_data_list
}
for label, group in df.groupby('label'):
heatmap_data = generate_heatmap_data(group)
json_filename = f"{label.replace(' ', '_').lower()}.json"
with open(json_filename, 'w', encoding='utf-8') as json_file:
json.dump(heatmap_data, json_file, indent=4)
print(f"Generated {json_filename} with {len(heatmap_data['data'])} records.")
distance_data.to_csv('distance_data.csv', index=False)
print("Distance data saved to distance_data.csv")
# Filter out technical outliers...
comp_query = 'total_distance < 15000'
# Boxplot drawing
plt.figure(figsize=(12, 6))
sns.boxplot(x='label', y='total_distance', data=distance_data.query(comp_query).apply(lambda x: x))
plt.xticks(rotation=45)
plt.xlabel('Study Page')
plt.xticks([0,1,2,3,4,5], ["1 - BudgetBird", "2 - Hotel", "3 - UVV", "4 - Iceland", "5 - Rental", "6 - QuickDeliver"])
plt.ylabel('Total Distance Traveled (pixels)')
plt.title('Total Distance Traveled per Study Page')
plt.tight_layout()
plt.savefig('distance_boxplot.png')
plt.show()