You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
96 lines
3.0 KiB
96 lines
3.0 KiB
import os
|
|
import json
|
|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
data_dir = './'
|
|
|
|
all_data = []
|
|
all_tap_logs = []
|
|
|
|
for filename in os.listdir(data_dir):
|
|
if filename.endswith('.json'):
|
|
file_path = os.path.join(data_dir, filename)
|
|
with open(file_path, encoding='utf-8') as file:
|
|
data = json.load(file)
|
|
tap_logs = data.get('sensorLog', {}).get('tapLog', [])
|
|
for entry in tap_logs:
|
|
entry['participant_id'] = filename
|
|
all_tap_logs.append(entry)
|
|
|
|
df = pd.DataFrame(all_tap_logs)
|
|
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
|
|
df['path'] = df['url'].apply(lambda x: x.split('://')[-1].split('/', 1)[-1])
|
|
|
|
# Mapping with tour_operators being mergeable to study page 2. :)
|
|
path_to_label = {
|
|
"study-page-1": "Study Page 1",
|
|
"study-page-2": "Study Page 2",
|
|
"study-page-3": "Study Page 3",
|
|
"study-page-4": "Study Page 4",
|
|
"study-page-5": "Study Page 5",
|
|
"study-page-6": "Study Page 6",
|
|
"tour_operators": "Study Page 2"
|
|
}
|
|
|
|
df['label'] = df['path'].map(path_to_label)
|
|
|
|
def calculate_distances(group):
|
|
group = group.sort_values(by='timestamp')
|
|
x_diff = group['x'].diff().fillna(0)
|
|
y_diff = group['y'].diff().fillna(0)
|
|
distances = np.sqrt(x_diff**2 + y_diff**2)
|
|
total_distance = distances.sum()
|
|
return total_distance
|
|
|
|
grouped = df.groupby(['participant_id', 'label'])
|
|
|
|
distance_data = grouped.apply(calculate_distances).reset_index()
|
|
distance_data.columns = ['participant_id', 'label', 'total_distance']
|
|
|
|
def generate_heatmap_data(group):
|
|
heatmap_data = group[['x', 'y']].copy()
|
|
heatmap_data['radius'] = 40
|
|
heatmap_data['value'] = 5
|
|
heatmap_data['x'] = heatmap_data['x'].astype(str)
|
|
heatmap_data['y'] = heatmap_data['y'].astype(str)
|
|
|
|
heatmap_data_list = heatmap_data.to_dict(orient='records')
|
|
|
|
min_value = 1
|
|
max_value = 999
|
|
|
|
return {
|
|
"min": min_value,
|
|
"max": max_value,
|
|
"data": heatmap_data_list
|
|
}
|
|
|
|
for label, group in df.groupby('label'):
|
|
heatmap_data = generate_heatmap_data(group)
|
|
json_filename = f"{label.replace(' ', '_').lower()}.json"
|
|
with open(json_filename, 'w', encoding='utf-8') as json_file:
|
|
json.dump(heatmap_data, json_file, indent=4)
|
|
|
|
print(f"Generated {json_filename} with {len(heatmap_data['data'])} records.")
|
|
|
|
distance_data.to_csv('distance_data.csv', index=False)
|
|
print("Distance data saved to distance_data.csv")
|
|
# Filter out technical outliers...
|
|
comp_query = 'total_distance < 15000'
|
|
|
|
# Boxplot drawing
|
|
plt.figure(figsize=(12, 6))
|
|
sns.boxplot(x='label', y='total_distance', data=distance_data.query(comp_query).apply(lambda x: x))
|
|
plt.xticks(rotation=45)
|
|
plt.xlabel('Study Page')
|
|
plt.xticks([0,1,2,3,4,5], ["1 - BudgetBird", "2 - Hotel", "3 - UVV", "4 - Iceland", "5 - Rental", "6 - QuickDeliver"])
|
|
plt.ylabel('Total Distance Traveled (pixels)')
|
|
plt.title('Total Distance Traveled per Study Page')
|
|
plt.tight_layout()
|
|
plt.savefig('distance_boxplot.png')
|
|
plt.show()
|