import pandas as pd import statsmodels.api as sm from statsmodels.formula.api import ols import scipy.stats as stats from scipy.stats import f_oneway, friedmanchisquare, mannwhitneyu, ranksums import numpy as np import pingouin as pg # SUS df = pd.read_csv('QuestionnaireDataSUS.csv') df['TotalIMIScore'] = ((df['Q1'] - 1) + (5 - df['Q2']) + (df['Q3'] - 1) + (5 - df['Q4']) + (df['Q5'] - 1) + (5 - df['Q6']) + (df['Q7'] - 1) + (5 - df['Q8']) + (df['Q9'] - 1) + (5 - df['Q10'])) * 2.5 # IMI #df = pd.read_csv('QuestionnaireDataIMI.csv') #df['TotalIMIScore'] = (df['Q1'] + df['Q2'] + (8 - df['Q3']) + (8 - df['Q4']) + df['Q5'] + df['Q6'] + df['Q7']) / 7.0 grouped = df.groupby('WebpageID').agg( mean_IMIScore=('TotalIMIScore', 'mean'), std_IMIScore=('TotalIMIScore', 'std'), count=('TotalIMIScore', 'count') ) grouped['variance_IMIScore'] = grouped['std_IMIScore'] ** 2 anova_data = [group['TotalIMIScore'].values for name, group in df.groupby('WebpageID')] anova_result = f_oneway(*anova_data) print(f"ANOVA Result: F-statistic = {anova_result.statistic}, p-value = {anova_result.pvalue}") friedman_data = df.pivot(index='ParticipantID', columns='WebpageID', values='TotalIMIScore').dropna() spher, W, chisq, dof, pval = pg.sphericity(data=df, within='WebpageID', dv='TotalIMIScore', subject='ParticipantID') gg = pg.epsilon(data=df, within='WebpageID', dv='TotalIMIScore', subject='ParticipantID', correction='gg') print(gg) friedman_result = friedmanchisquare(*[friedman_data[col] for col in friedman_data]) print(f"Friedman Test Result: Chi-square statistic = {friedman_result.statistic}, p-value = {friedman_result.pvalue}") model = ols('TotalIMIScore ~ C(WebpageID)', data=df).fit() anova_table = sm.stats.anova_lm(model, typ=2) print(anova_table) print(stats.shapiro(anova_data[0])) print(stats.shapiro(anova_data[1])) print(stats.shapiro(anova_data[2])) print(stats.shapiro(anova_data[3])) print(stats.shapiro(anova_data[4])) print(stats.shapiro(anova_data[5])) print(stats.levene(*anova_data)) print(spher, round(W, 5), round(chisq, 3), dof, round(pval, 3)) group_1 = df[df['WebpageID'] == 1]['TotalIMIScore'] group_2 = df[df['WebpageID'] == 2]['TotalIMIScore'] group_3 = df[df['WebpageID'] == 3]['TotalIMIScore'] group_4 = df[df['WebpageID'] == 4]['TotalIMIScore'] group_5 = df[df['WebpageID'] == 5]['TotalIMIScore'] group_6 = df[df['WebpageID'] == 6]['TotalIMIScore'] def mann_whitney_test(group_a, group_b): u_statistic, p_value = mannwhitneyu(group_a, group_b) n1 = len(group_a) n2 = len(group_b) mu_u = n1 * n2 / 2 sigma_u = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12) z_value = (u_statistic - mu_u) / sigma_u effect_size_r = z_value / np.sqrt(n1 + n2) return u_statistic, p_value, z_value, effect_size_r def wilcoxon_rank_sum_test(group_a, group_b): rank_sum_statistic, p_value = ranksums(group_a, group_b) effect_size_r = rank_sum_statistic / np.sqrt(len(group_a) + len(group_b)) return rank_sum_statistic, p_value, effect_size_r comparisons = [ ("2 vs 5", group_2, group_5), ("2 vs 3", group_2, group_3), ("3 vs 5", group_3, group_5), ("1 vs 6", group_1, group_6), ("4 vs 6", group_4, group_6), ("1 vs 4", group_1, group_4), ] results = [] res = pg.rm_anova(data=df, within='WebpageID', dv='TotalIMIScore', subject='ParticipantID', detailed=True) print(res) for label, group_a, group_b in comparisons: mw_u_statistic, mw_p_value, mw_z_value, mw_effect_size_r = mann_whitney_test(group_a, group_b) ws_rank_sum_statistic, ws_p_value, ws_effect_size_r = wilcoxon_rank_sum_test(group_a, group_b) results.append({ 'Comparison': label, 'Mann-Whitney U Statistic': mw_u_statistic, 'Mann-Whitney p-value': mw_p_value, 'Mann-Whitney Z-value': mw_z_value, 'Mann-Whitney Effect Size (r)': mw_effect_size_r, 'Wilcoxon Rank-Sum Statistic': ws_rank_sum_statistic, 'Wilcoxon p-value': ws_p_value, 'Wilcoxon Effect Size (r)': ws_effect_size_r }) results_df = pd.DataFrame(results) print(results_df)