Related scripts to the Master's Thesis "Exploring Sonification in Website Navigation on Smartphones"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 lines
4.0 KiB

import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import scipy.stats as stats
from scipy.stats import f_oneway, friedmanchisquare, mannwhitneyu, ranksums
import numpy as np
import pingouin as pg
# SUS
df = pd.read_csv('QuestionnaireDataSUS.csv')
df['TotalIMIScore'] = ((df['Q1'] - 1) + (5 - df['Q2']) + (df['Q3'] - 1) + (5 - df['Q4']) + (df['Q5'] - 1) + (5 - df['Q6']) + (df['Q7'] - 1) + (5 - df['Q8']) + (df['Q9'] - 1) + (5 - df['Q10'])) * 2.5
# IMI
#df = pd.read_csv('QuestionnaireDataIMI.csv')
#df['TotalIMIScore'] = (df['Q1'] + df['Q2'] + (8 - df['Q3']) + (8 - df['Q4']) + df['Q5'] + df['Q6'] + df['Q7']) / 7.0
grouped = df.groupby('WebpageID').agg(
mean_IMIScore=('TotalIMIScore', 'mean'),
std_IMIScore=('TotalIMIScore', 'std'),
count=('TotalIMIScore', 'count')
)
grouped['variance_IMIScore'] = grouped['std_IMIScore'] ** 2
anova_data = [group['TotalIMIScore'].values for name, group in df.groupby('WebpageID')]
anova_result = f_oneway(*anova_data)
print(f"ANOVA Result: F-statistic = {anova_result.statistic}, p-value = {anova_result.pvalue}")
friedman_data = df.pivot(index='ParticipantID', columns='WebpageID', values='TotalIMIScore').dropna()
spher, W, chisq, dof, pval = pg.sphericity(data=df, within='WebpageID', dv='TotalIMIScore', subject='ParticipantID')
gg = pg.epsilon(data=df, within='WebpageID', dv='TotalIMIScore', subject='ParticipantID', correction='gg')
print(gg)
friedman_result = friedmanchisquare(*[friedman_data[col] for col in friedman_data])
print(f"Friedman Test Result: Chi-square statistic = {friedman_result.statistic}, p-value = {friedman_result.pvalue}")
model = ols('TotalIMIScore ~ C(WebpageID)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)
print(stats.shapiro(anova_data[0]))
print(stats.shapiro(anova_data[1]))
print(stats.shapiro(anova_data[2]))
print(stats.shapiro(anova_data[3]))
print(stats.shapiro(anova_data[4]))
print(stats.shapiro(anova_data[5]))
print(stats.levene(*anova_data))
print(spher, round(W, 5), round(chisq, 3), dof, round(pval, 3))
group_1 = df[df['WebpageID'] == 1]['TotalIMIScore']
group_2 = df[df['WebpageID'] == 2]['TotalIMIScore']
group_3 = df[df['WebpageID'] == 3]['TotalIMIScore']
group_4 = df[df['WebpageID'] == 4]['TotalIMIScore']
group_5 = df[df['WebpageID'] == 5]['TotalIMIScore']
group_6 = df[df['WebpageID'] == 6]['TotalIMIScore']
def mann_whitney_test(group_a, group_b):
u_statistic, p_value = mannwhitneyu(group_a, group_b)
n1 = len(group_a)
n2 = len(group_b)
mu_u = n1 * n2 / 2
sigma_u = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12)
z_value = (u_statistic - mu_u) / sigma_u
effect_size_r = z_value / np.sqrt(n1 + n2)
return u_statistic, p_value, z_value, effect_size_r
def wilcoxon_rank_sum_test(group_a, group_b):
rank_sum_statistic, p_value = ranksums(group_a, group_b)
effect_size_r = rank_sum_statistic / np.sqrt(len(group_a) + len(group_b))
return rank_sum_statistic, p_value, effect_size_r
comparisons = [
("2 vs 5", group_2, group_5),
("2 vs 3", group_2, group_3),
("3 vs 5", group_3, group_5),
("1 vs 6", group_1, group_6),
("4 vs 6", group_4, group_6),
("1 vs 4", group_1, group_4),
]
results = []
res = pg.rm_anova(data=df, within='WebpageID', dv='TotalIMIScore', subject='ParticipantID', detailed=True)
print(res)
for label, group_a, group_b in comparisons:
mw_u_statistic, mw_p_value, mw_z_value, mw_effect_size_r = mann_whitney_test(group_a, group_b)
ws_rank_sum_statistic, ws_p_value, ws_effect_size_r = wilcoxon_rank_sum_test(group_a, group_b)
results.append({
'Comparison': label,
'Mann-Whitney U Statistic': mw_u_statistic,
'Mann-Whitney p-value': mw_p_value,
'Mann-Whitney Z-value': mw_z_value,
'Mann-Whitney Effect Size (r)': mw_effect_size_r,
'Wilcoxon Rank-Sum Statistic': ws_rank_sum_statistic,
'Wilcoxon p-value': ws_p_value,
'Wilcoxon Effect Size (r)': ws_effect_size_r
})
results_df = pd.DataFrame(results)
print(results_df)