From 80d6d47003d08046cc8fccc67ec25fc0bed3eecf Mon Sep 17 00:00:00 2001 From: DART Date: Wed, 30 Oct 2024 16:39:00 +0000 Subject: [PATCH] adding defined tables --- analysis/dataset_definition_core.py | 4 +--- analysis/report.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/analysis/dataset_definition_core.py b/analysis/dataset_definition_core.py index c1d8b11..c40df8f 100644 --- a/analysis/dataset_definition_core.py +++ b/analysis/dataset_definition_core.py @@ -67,6 +67,4 @@ dataset.sex = patients.sex -dataset.dob = patients.date_of_birth - - +dataset.dob = patients.date_of_birth \ No newline at end of file diff --git a/analysis/report.py b/analysis/report.py index 0f80a5b..192f3ff 100644 --- a/analysis/report.py +++ b/analysis/report.py @@ -8,11 +8,11 @@ os.makedirs(output_dir, exist_ok=True) # Load the data -data = pd.read_csv("output/full_dataset.csv.gz") +data = pd.read_csv("output/full_dataset_test.csv.gz") # Melt the ADHD events data adhd_events = data.melt( - id_vars=['patient_id', 'sex'], + id_vars=['patient_id', 'sex','dob'], value_vars=[f'num_adhd_events_{year}' for year in range(2016, 2024)], var_name='year', value_name='num_adhd_events' @@ -20,7 +20,17 @@ #Computing ADHD disgnosis +#First this is the total adhd_total_sex_table = data.groupby(['sex']).count() -adhd_total_sex_table = adhd_total_sex_table.drop(['patient_id'],axis=1) +adhd_total_sex_table = adhd_total_sex_table.drop(['patient_id','dob'],axis=1) -adhd_dia_sex_table = data. \ No newline at end of file +#Second the the number of adhd dia +#Need to binaries the dia +adhd_dia_sex_table = data.copy() +col_years = [f'num_adhd_events_{year}' for year in range(2016, 2024)] +adhd_dia_sex_table[col_years] = adhd_dia_sex_table[col_years] > 0 +adhd_dia_sex_table = adhd_dia_sex_table.groupby(['sex']).sum() +adhd_dia_sex_table = adhd_dia_sex_table.drop(['patient_id','dob'],axis=1) + +#Caulcate the prelavence +prevelnce = adhd_dia_sex_table/adhd_total_sex_table \ No newline at end of file