-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdashboard.py
460 lines (388 loc) · 19 KB
/
dashboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import plotly.express as px
mod_pres_list = ['CCC_2014B','CCC_2014J','DDD_2013B','DDD_2013J','DDD_2014B','DDD_2014J']
mod_pres_list_task2 = ['CCC_2014B', 'DDD_2013B', 'DDD_2014B', 'CCC_2014J' ]
col_dict = {'Gender':'gender',
'Disability':'disability',
'Previous education':'educ_band',
'Age':'age_band',
'Is repeating':'is_repeating',
'Other credits':'credits_other_band',
'IMD': 'imd_2',
'Overall': None
}
def plot_per_group(df, att = None, outcome="score", density=True):
fig, ax = plt.subplots(figsize=(8, 6))
if att is not None:
df = df.groupby(att)
else:
att = 'Overall'
type_of_graph = st.selectbox("Type of graph", ['Histogram','Cumulative histogram','Density plot'])
if type_of_graph == 'Histogram':
cumulative=0
bins = (0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 101)
elif type_of_graph == 'Cumulative histogram':
cumulative=1
bins = range(0,101)
ind = range(0,101)
if type_of_graph == 'Density plot':
y_legend = "Density"
df[outcome].plot.kde(
# density=density_inner_flag,
# bins=bins,
# alpha=0.5,
ax=ax,
legend=True,
ind=ind,
bw_method=0.3,
# histtype=fill_type,
# cumulative=cumulative,
# lw=3
)
else:
fill = st.selectbox("Type of fil", ['Outline only','Filled'])
density_inner = st.selectbox("Normalise the counts", ['YES','NO'])
if fill == 'Filled':
fill_type = 'stepfilled'
else:
fill_type = 'step'
if density_inner == 'YES':
density_inner_flag = 1
y_legend = "Proportion of students for a given exam band."
else:
density_inner_flag = 0
y_legend = "Frequency - number of students for a given exam band"
df[outcome].hist(
density=density_inner_flag,
bins=bins,
alpha=0.5,
ax=ax,
legend=True,
histtype=fill_type,
cumulative=cumulative,
lw=3
)
ax.set_title('Histogram - ' + att)
ax.set_xlabel("Exam score")
ax.set_ylabel(y_legend)
ax.grid(True, linestyle='--', alpha=0.8)
return fig
def show_correlations_tma(df, assessment_types = 'both', precision=4):
st.header('TMA Correlations')
if assessment_types == 'TMA':
asssessments = ['TMA 1','TMA 2','TMA 3','TMA 4','TMA 5','TMA 6', 'exam']
elif assessment_types == 'CMA':
asssessments = ['CMA 1','CMA 2','CMA 3','CMA 4','CMA 5','CMA 6','CMA 7', 'exam']
elif assessment_types == 'both':
asssessments = ['TMA 1','TMA 2','TMA 3','TMA 4','TMA 5','TMA 6',
'CMA 1','CMA 2','CMA 3','CMA 4','CMA 5','CMA 6','CMA 7',
'exam']
df_corr = (df
[asssessments]
.corr()
.round(precision)
.dropna(axis='index',how='all')
.dropna(axis='columns',how='all')
.style
.format(precision=precision)
.background_gradient(axis=None, vmin=0, vmax=1,
cmap="YlGnBu"
)
)
st.write(df_corr)
st.write("Each value in the table is a Person correlation coefficient between the two variables.")
st.write("- exam = Exam Score")
st.write("- TMA 1-6 = The score in the Tutor Marked Assignment")
st.write("- CMA 1-7 = The score in the Computer Marked Assignment (Quiz)")
def show_registrations(df_reg):
st.header('Registrations')
max_reg = df_reg.num_students_registered.max()
df_reg = df_reg.set_index('week').assign(pct_registered_student = lambda df_: ((df_.num_students_registered/max_reg)* 100) )
density_inner = st.selectbox("Normalise the counts", ['YES','NO'])
if density_inner == "YES":
col = 'pct_registered_student'
else:
col = 'num_students_registered'
df_to_plot = df_reg[col].reset_index()
fig = px.line(df_to_plot.reset_index(), x='week',
y=df_to_plot.columns,
labels={'value': '% students registered in a given week.'}) # Replace with your column names
fig.update_yaxes(range=[0, 100])
st.plotly_chart(fig, key="regisration", on_select="rerun")
def show_engagement(df_vle, df_vle_demog, df_reg):
st.header("VLE Engagement")
mod_pres = st.sidebar.selectbox("Select Course", mod_pres_list_task2)
df_vle_filt = (df_vle
.loc[lambda df_: df_.mod_pres == mod_pres]
.drop(columns=['mod_pres'])
)
df_reg_filt = (df_reg
.loc[lambda df_: df_.mod_pres == mod_pres]
)
# fig, ax = plt.subplots(figsize=(8, 6))
df_to_plot = (df_vle_filt
.set_index(['week'])
.dropna(axis='columns',how='all')
# .drop(columns=drop_cols)
.rename(columns={'oucollaborate':'Webinar',
'ouelluminate':'Webinar',
'resource':'PDF Download',
'forumng':'Forum',
'oucontent':'HTML content',
'url':'External link',
'homepage':'Homepage',
'quiz':'Quiz','externalquiz':'Quiz',
'ouwiki':'Wiki',
'glossary':'Glossary'
})
.fillna(0)
)
drop_cols = ['dataplus','page','sharedsubpage','dualpane','folder','subpage']
for col in drop_cols:
if col in df_to_plot:
df_to_plot = df_to_plot.drop(columns=[col])
fig = px.line(df_to_plot.reset_index(),
x='week',
y=df_to_plot.columns,
labels={'value': '% students with the activity, out of all registered in that week. ',
'variable':'Activity Type'}
)
fig.update_yaxes(range=[0, 1])
st.plotly_chart(fig, key="vle_at", on_select="rerun", theme=None,)
st.write("Overall percentage of students in VLE per demographic group.")
selected_column_name = get_selected_column()
col = col_dict[selected_column_name]
if selected_column_name != 'Overall':
df_vle_demog_filt = (df_vle_demog
.loc[lambda df_: df_.mod_pres == mod_pres]
.loc[lambda df_: df_.col == col]
.drop(columns=['mod_pres','col'])
.set_index(['week','val'])
.unstack()
['pct_students']
)
fig2 = px.line(df_vle_demog_filt.reset_index(), x='week',
y=df_vle_demog_filt.columns,
labels={
'value': '% students with the activity, out of all registered in that week. ',
'variable':'Attribute Value'
}
)
fig2.update_yaxes(range=[0, 1])
st.plotly_chart(fig2, theme=None, key="vle_grp", on_select="rerun")
# show_student_counts(df_vle_demog_filt, col)
show_registrations(df_reg_filt)
def show_assessment_difficulty(df, df_ass, df_ass_stats, precision=2):
st.header('TMA difficulty')
asssessments = ['TMA 1','TMA 2','TMA 3','TMA 4','TMA 5','TMA 6',
'CMA 1','CMA 2','CMA 3','CMA 4','CMA 5','CMA 6','CMA 7',
'exam']
df_res_all = (df[asssessments].mean().dropna().round(precision).rename('Average Score')
.reset_index()
.merge(df_ass, left_on=['index'], right_index=True)
.merge(df_ass_stats
.drop(columns=['week','num_students_registered','num_submitted',
'avg_score','score_std']).round(precision)
.assign(pct_late = lambda df_: df_.pct_late * 100,
pct_submitted = lambda df_: df_.pct_submitted * 100),
left_on=['index'], right_index=True)
.set_index(['index'])
.sort_values(by=['date'])
[['date','week','weight','pct_submitted','pct_late','Average Score']]
)
st.write(df_res_all)
st.write("Columns explanation")
st.write("- date = deadline of the assignment as the numbet of days from the course start")
st.write("- week = week of the assignment deadline")
st.write("- weight [0-100] = how much the score contributes to the overall course assessmnet score (OCAS)")
st.write("- pct_submitted [0-100] = percentage of students that submitted the assignment, out of number of students that were registered at the date of the deadline")
st.write("- pct_late [0-100] = percentage of students that submitted the assignment after the deadline, out of students that submitted.")
st.write("- Average score [0-100] = mean score achieved by students, out of students that submitted. Not-submitted are not counted in the average.")
st.header('-- Average Score per student factor')
selected_column_name = get_selected_column()
col = col_dict[selected_column_name]
if selected_column_name != 'Overall':
df_res = (df.groupby(col)[asssessments].mean().transpose().dropna(axis='index',how='all')
.add_prefix('Average Score '))
df_res = (df_res.style
.format(precision=precision)
.background_gradient(axis=None, vmin=0, vmax=100,
cmap="YlGnBu"
)
)
st.write(df_res)
show_student_counts(df, col)
def show_assessment_correlations(df, df_ass, df_ass_stats):
st.header('Assessment Correlations')
mod_pres = st.sidebar.selectbox("Select Course", mod_pres_list_task2)
df_filt = df.loc[lambda df_: df_.mod_pres == mod_pres]
df_ass_filt = (df_ass
.loc[lambda df_: df_.mod_pres == mod_pres]
.drop(columns=['code_module','code_presentation', 'mod_pres'])
.set_index(['assessment_name'])
)
df_ass_stats_filt = (df_ass_stats
.loc[lambda df_: df_.mod_pres == mod_pres]
.drop(columns=['mod_pres'])
.set_index(['assessment_name'])
)
show_correlations_tma(df_filt)
show_assessment_difficulty(df_filt, df_ass_filt, df_ass_stats_filt)
def show_correlations(df, precision=4):
st.header('Correlations')
st.markdown("""Each value in the two tables below is a [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) between the semester score that the student gained and the exam score.
- **semester_score** = Weighted average of assignments accumulated during the semester.
- **exam** = Exam Score
""")
df_corr = (df
# .loc[lambda df_: df_.mod_pres == mod_pres]
.groupby(['mod_pres'])
[[
'ocas','exam'
]]
.corr()
.round(precision)
.dropna(axis='index',how='all')
.dropna(axis='columns',how='all')
.reset_index()
.drop(columns=['ocas'])
.set_index('mod_pres')
[['exam']]
.loc[lambda df_: df_.exam < 1.0]
.style
.format(precision=precision)
.background_gradient(axis=None, vmin=0, vmax=1,
cmap="YlGnBu"
)
)
st.write('Overall correlation between the semester score and the exam score.')
st.write(df_corr)
st.write("**Correlations per student factors**")
selected_column_name = get_selected_column()
col = col_dict[selected_column_name]
df_corr_col = (df
.groupby(['mod_pres',col])
[[
'ocas','exam'
]]
.corr()
.round(4)
['exam']
.loc[lambda df_: df_ < 1.0]
.unstack().unstack()['ocas']
.style
.format(precision=4)
.background_gradient(axis=None, vmin=0, vmax=1,
cmap="YlGnBu"
)
)
st.write(df_corr_col)
show_student_counts(df, col)
def show_student_counts(df, col):
st.write("**Student Counts per selected characteristic**")
counts_all = "Overall Number of students: " + str(len(df))
st.write(counts_all)
if col != None:
if 'mod_pres' in df.columns:
counts = df.groupby(['mod_pres',col]).size().unstack()
else:
counts = df.groupby([col]).size().unstack()
st.write(counts)
def get_selected_column():
return st.selectbox("Select a column", [
'Overall',
'Gender',
'Is repeating',
'Disability',
'Previous education',
'Age',
'Other credits',
'IMD'])
def show_histograms(df):
st.header('Histograms')
mod_pres = st.sidebar.selectbox("Select Course",mod_pres_list)
selected_column_name = get_selected_column()
col = col_dict[selected_column_name]
df_filt = df.loc[lambda df_: df_.mod_pres == mod_pres]
st.pyplot(
plot_per_group(df_filt, att = col, outcome="exam", density=True)
)
st.header("Count of students")
counts_all = "Overall Number of students: " + str(len(df_filt))
st.write(counts_all)
if selected_column_name != 'Overall':
counts = df_filt.groupby(col).size().rename('Number of students')
st.write(counts)
def show_data_overview(df):
st.header('Data Overview')
st.markdown("""
**Data description**
The data describes two course CCC and DDD from OULAD dataset, in 4 different presntations (runs). For CCC it is 2014B and 2014J. For DDD it is 2013B, 2013J, 2014B and 2014J.
- **B** - the course starts in February
- **J** - the course starts in October
Both courses are from the STEM area, and they have a duration more than 30 weeks. Students need to gain enough points during the semester and then pass the exam to pass the whole course.
**Attributes**
- **Is repeating** - New=New student, Repeating=student already tried to take this module in the past but did not finish it (it might be early or late withdrawal, failing the exam)
- **Disability** - Flag whether the student has declared any disability (Y/N)
- **Gender** - Male or Female
- **Previous education** - Highest education level achieved before the start of the course banded into three categories, based on A-level (similar to maturita exam)
- **Age** - Age of the student at the start of the course banded into two categories (older or younger than 35 years)
- **Other credits** - Total number of enrolled credits in other than the studied course, banded into three categories - [0-1] = no other credits, [1-60) - less than 60, [60,600) - more than 60
- **IMD** - Index of multiple deprivation, banded into three categories - Q1_Q2 = 0\%-40, indicating the highest poverty areas, Q3 = 40\%-60\%, indicating medium quality of the neighbourhood, Q4_Q5 = 60\%-100\%, indicating the most affluent areas and INT = missing IMD, indicating international students
**VLE activity types***
- **Homepage** - visiting a homepage of the course, usually any other activity happens after the homepage visit
- **Forum** - students discussing in an online forum, can be both reading or contribution
- **Webinar** - synchronous meeting using an online platform (such as Teams, Adobe connect, etc.)
- **Quiz** - computer graded quiz, can result in a score in CMA or if it is formattive, then it is just an indication for a student without any grading implication.
- **External link** - indicates click leading outside Moodle VLE (can be news, video platform, or some external learning system, ...)
- **PDF Download** - downloading of a PDF resource that can be then viewed offline and such activity cannot be tracked anymore
- **HTML content** - usually the main content of the course is presented in this way, includes structuring into sections and can contain images
- **Glossary** - can serve as a collaborative dictionary where students and teachers add new entries, could be viewing a Glossary, adding or editing a Glossary entry (word, term, or definition), or approving entries.
- **Wiki** - collaborative tool allowing multiple students to create, edit, and manage a collection of web pages. Could be group work, or building a knowledge base collectively. The activity can be viewing the Wiki page or creating or editing a Wiki entry.
""")
st.write("**Student Counts**")
counts = df.groupby(['mod_pres']).size().rename('Total number')
counts_gender = df.groupby(['mod_pres','gender']).size().unstack().add_prefix('gender_')
counts_age = df.groupby(['mod_pres','age_band']).size().unstack().add_prefix('age_')
counts_rep = df.groupby(['mod_pres','is_repeating']).size().unstack().add_prefix('repeat_')
counts_dis = df.groupby(['mod_pres','disability']).size().unstack().add_prefix('disability_')
counts_educ = df.groupby(['mod_pres','educ_band']).size().unstack().add_prefix('educ_')
counts_cred = df.groupby(['mod_pres','credits_other_band']).size().unstack().add_prefix('credits_')
counts_cred = df.groupby(['mod_pres','imd_2']).size().unstack().add_prefix('IMD_')
counts = (pd.DataFrame(counts)
.merge(counts_gender, on=['mod_pres'])
.merge(counts_age, on=['mod_pres'])
.merge(counts_rep, on=['mod_pres'])
.merge(counts_dis, on=['mod_pres'])
.merge(counts_educ, on=['mod_pres'])
.merge(counts_cred, on=['mod_pres'])
)
st.write(counts)
def main():
df = pd.read_csv('./data/stud_course_all.csv')
df_ass = pd.read_csv('./data/assessments.csv')
df_ass_stats = pd.read_csv('./data/stud_ass_stats.csv')
df_reg = pd.read_csv('./data/reg_stat_weekly_all.csv')
df_vle = pd.read_csv('./data/vle_by_act_all.csv')
df_vle_demog = pd.read_csv('./data/vle_weekly_demog_all.csv')
st.sidebar.title("OULAD Visualisation")
option = st.sidebar.selectbox("Select View", ["_Data Overview",
"Task1_Histograms",
# "Correlations",
"Task2_Assessments",
"Task2_Engagement"
])
if option == "_Data Overview":
show_data_overview(df)
if option == "Correlations":
show_correlations(df)
if option == "Task1_Histograms":
show_histograms(df)
if option == "Task2_Assessments":
show_assessment_correlations(df, df_ass, df_ass_stats)
if option == "Task2_Engagement":
show_engagement(df_vle, df_vle_demog, df_reg)
if __name__ == "__main__":
main()