First script on looking at ADHD

opensafely · Dec 18, 2024 · 59f9f00 · 59f9f00
1 parent 4be15d9
commit 59f9f00
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 75 deletions.
diff --git a/analysis/dataset_definition_core.py b/analysis/dataset_definition_core.py
@@ -22,7 +22,7 @@
 is_female_or_male = patients.sex.is_in(["female", "male"])
 
 was_adult = (patients.age_on(start_date) >= 18) & (
-    patients.age_on(start_date) <= 100
+    patients.age_on(start_date) <= 120
 )
 
 was_alive = (
@@ -65,24 +65,6 @@
     # Set the attribute on the dataset
     setattr(dataset, attribute_name, num_adhd_events_year)
 
-for year in years:
-    start_date = f"{year}-01-01"
-    end_date = f"{year}-12-31"
-
-    # Construct the attribute name dynamically for each year
-    attribute_name = f"num_methylphenidate_prescription_{year}"
-
-    # Calculate the number of methyphenidate events for the given year
-    num_methylphenidate_prescription_year = medications.where(
-    medications.dmd_code.is_in(methylphenidate_codelist)
-    & medications.date.is_on_or_between(
-        start_date, end_date)
-    ).count_for_patient()
-
-    # Set the attribute on the dataset
-    setattr(dataset, attribute_name, num_methylphenidate_prescription_year)
-
 
 dataset.sex = patients.sex
-
-
+dataset.dob = patients.date_of_birth
diff --git a/analysis/report.py b/analysis/report.py
@@ -8,66 +8,32 @@
 os.makedirs(output_dir, exist_ok=True)
 
 # Load the data
-data = pd.read_csv("output/full_dataset.csv.gz")
+data = pd.read_csv("output/full_dataset_test.csv.gz")
 
 # Melt the ADHD events data
 adhd_events = data.melt(
-    id_vars=['patient_id', 'sex'],
+    id_vars=['patient_id', 'sex','dob'],
     value_vars=[f'num_adhd_events_{year}' for year in range(2016, 2024)],
     var_name='year',
     value_name='num_adhd_events'
 )
 
-# Extract the year from the 'year' column
-adhd_events['year'] = adhd_events['year'].str.extract(r'(\d{4})').astype(int)
+#Computing ADHD disgnosis 
 
-# Group by year and sex, then sum the number of ADHD events
-adhd_events_grouped = adhd_events.groupby(['year', 'sex'])['num_adhd_events'].sum().reset_index()
+#First this is the total
+adhd_total_sex_table = data.groupby(['sex']).count()
+adhd_total_sex_table = adhd_total_sex_table.drop(['patient_id','dob'],axis=1)
 
-# Plot the ADHD events per year, per sex
-plt.figure(figsize=(12, 6))
-width = 0.35
-years = adhd_events_grouped['year'].unique()
-x = np.arange(len(years))
+#Second the the number of adhd dia
+#Need to binaries the dia
+adhd_dia_sex_table = data.copy()
+col_years = [f'num_adhd_events_{year}' for year in range(2016, 2024)]
+adhd_dia_sex_table[col_years] = adhd_dia_sex_table[col_years] > 0
+adhd_dia_sex_table = adhd_dia_sex_table.groupby(['sex']).sum()
+adhd_dia_sex_table = adhd_dia_sex_table.drop(['patient_id','dob'],axis=1)
 
-for i, sex in enumerate(adhd_events_grouped['sex'].unique()):
-    subset = adhd_events_grouped[adhd_events_grouped['sex'] == sex]
-    plt.bar(x + (i * width - width / 2), subset['num_adhd_events'], width, label=sex)
+#Caulcate the prelavence
+prevelnce = adhd_dia_sex_table/adhd_total_sex_table
 
-plt.xlabel('Year')
-plt.ylabel('Number of ADHD Events')
-plt.title('Number of ADHD Events per Year by Sex')
-plt.xticks(x, years)
-plt.legend(title='Sex')
-plt.grid(True, axis='y')
-plt.savefig(os.path.join(output_dir, 'adhd_events_per_year_by_sex.png'))
-plt.close()
-
-# Melt the methylphenidate prescription data
-methylphenidate_prescriptions = data.melt(
-    id_vars=['patient_id', 'sex'],
-    value_vars=[f'num_methylphenidate_prescription_{year}' for year in range(2016, 2024)],
-    var_name='year',
-    value_name='num_methylphenidate_prescriptions'
-)
-
-# Extract the year from the 'year' column
-methylphenidate_prescriptions['year'] = methylphenidate_prescriptions['year'].str.extract(r'(\d{4})').astype(int)
-
-# Group by year and sex, then sum the number of methylphenidate prescriptions
-methylphenidate_prescriptions_grouped = methylphenidate_prescriptions.groupby(['year', 'sex'])['num_methylphenidate_prescriptions'].sum().reset_index()
-
-# Plot the methylphenidate prescriptions per year, per sex
-plt.figure(figsize=(12, 6))
-for i, sex in enumerate(methylphenidate_prescriptions_grouped['sex'].unique()):
-    subset = methylphenidate_prescriptions_grouped[methylphenidate_prescriptions_grouped['sex'] == sex]
-    plt.bar(x + (i * width - width / 2), subset['num_methylphenidate_prescriptions'], width, label=sex)
-
-plt.xlabel('Year')
-plt.ylabel('Number of Methylphenidate Prescriptions')
-plt.title('Number of Methylphenidate Prescriptions per Year by Sex')
-plt.xticks(x, years)
-plt.legend(title='Sex')
-plt.grid(True, axis='y')
-plt.savefig(os.path.join(output_dir, 'methylphenidate_prescriptions_per_year_by_sex.png'))
-plt.close()
+#Need to save the table
+prevelnce.to_csv('output/results_saved.csv')
diff --git a/project.yaml b/project.yaml
@@ -6,15 +6,14 @@ expectations:
 
 actions:
   generate_dataset_full:
-    run: ehrql:v1 generate-dataset analysis/dataset_definition_core.py --output output/full_dataset.csv.gz
+    run: ehrql:v1 generate-dataset analysis/dataset_definition_core.py --output output/full_dataset_test.csv.gz
     outputs:
       highly_sensitive:
-        full_dataset: output/full_dataset.csv.gz
+        full_dataset: output/full_dataset_test.csv.gz
 
   generate_charts:
     run: python:v2 python analysis/report.py
     needs: [generate_dataset_full]
     outputs:
       moderately_sensitive:
-        chart1: output/adhd_events_per_year_by_sex.png
-        chart2: output/methylphenidate_prescriptions_per_year_by_sex.png
+        table1: output/results_saved.csv