Skip to content

Commit

Permalink
First script on looking at ADHD
Browse files Browse the repository at this point in the history
  • Loading branch information
quan14 authored Dec 18, 2024
1 parent 4be15d9 commit 59f9f00
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 75 deletions.
22 changes: 2 additions & 20 deletions analysis/dataset_definition_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
is_female_or_male = patients.sex.is_in(["female", "male"])

was_adult = (patients.age_on(start_date) >= 18) & (
patients.age_on(start_date) <= 100
patients.age_on(start_date) <= 120
)

was_alive = (
Expand Down Expand Up @@ -65,24 +65,6 @@
# Set the attribute on the dataset
setattr(dataset, attribute_name, num_adhd_events_year)

for year in years:
start_date = f"{year}-01-01"
end_date = f"{year}-12-31"

# Construct the attribute name dynamically for each year
attribute_name = f"num_methylphenidate_prescription_{year}"

# Calculate the number of methyphenidate events for the given year
num_methylphenidate_prescription_year = medications.where(
medications.dmd_code.is_in(methylphenidate_codelist)
& medications.date.is_on_or_between(
start_date, end_date)
).count_for_patient()

# Set the attribute on the dataset
setattr(dataset, attribute_name, num_methylphenidate_prescription_year)


dataset.sex = patients.sex


dataset.dob = patients.date_of_birth
68 changes: 17 additions & 51 deletions analysis/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,66 +8,32 @@
os.makedirs(output_dir, exist_ok=True)

# Load the data
data = pd.read_csv("output/full_dataset.csv.gz")
data = pd.read_csv("output/full_dataset_test.csv.gz")

# Melt the ADHD events data
adhd_events = data.melt(
id_vars=['patient_id', 'sex'],
id_vars=['patient_id', 'sex','dob'],
value_vars=[f'num_adhd_events_{year}' for year in range(2016, 2024)],
var_name='year',
value_name='num_adhd_events'
)

# Extract the year from the 'year' column
adhd_events['year'] = adhd_events['year'].str.extract(r'(\d{4})').astype(int)
#Computing ADHD disgnosis

# Group by year and sex, then sum the number of ADHD events
adhd_events_grouped = adhd_events.groupby(['year', 'sex'])['num_adhd_events'].sum().reset_index()
#First this is the total
adhd_total_sex_table = data.groupby(['sex']).count()
adhd_total_sex_table = adhd_total_sex_table.drop(['patient_id','dob'],axis=1)

# Plot the ADHD events per year, per sex
plt.figure(figsize=(12, 6))
width = 0.35
years = adhd_events_grouped['year'].unique()
x = np.arange(len(years))
#Second the the number of adhd dia
#Need to binaries the dia
adhd_dia_sex_table = data.copy()
col_years = [f'num_adhd_events_{year}' for year in range(2016, 2024)]
adhd_dia_sex_table[col_years] = adhd_dia_sex_table[col_years] > 0
adhd_dia_sex_table = adhd_dia_sex_table.groupby(['sex']).sum()
adhd_dia_sex_table = adhd_dia_sex_table.drop(['patient_id','dob'],axis=1)

for i, sex in enumerate(adhd_events_grouped['sex'].unique()):
subset = adhd_events_grouped[adhd_events_grouped['sex'] == sex]
plt.bar(x + (i * width - width / 2), subset['num_adhd_events'], width, label=sex)
#Caulcate the prelavence
prevelnce = adhd_dia_sex_table/adhd_total_sex_table

plt.xlabel('Year')
plt.ylabel('Number of ADHD Events')
plt.title('Number of ADHD Events per Year by Sex')
plt.xticks(x, years)
plt.legend(title='Sex')
plt.grid(True, axis='y')
plt.savefig(os.path.join(output_dir, 'adhd_events_per_year_by_sex.png'))
plt.close()

# Melt the methylphenidate prescription data
methylphenidate_prescriptions = data.melt(
id_vars=['patient_id', 'sex'],
value_vars=[f'num_methylphenidate_prescription_{year}' for year in range(2016, 2024)],
var_name='year',
value_name='num_methylphenidate_prescriptions'
)

# Extract the year from the 'year' column
methylphenidate_prescriptions['year'] = methylphenidate_prescriptions['year'].str.extract(r'(\d{4})').astype(int)

# Group by year and sex, then sum the number of methylphenidate prescriptions
methylphenidate_prescriptions_grouped = methylphenidate_prescriptions.groupby(['year', 'sex'])['num_methylphenidate_prescriptions'].sum().reset_index()

# Plot the methylphenidate prescriptions per year, per sex
plt.figure(figsize=(12, 6))
for i, sex in enumerate(methylphenidate_prescriptions_grouped['sex'].unique()):
subset = methylphenidate_prescriptions_grouped[methylphenidate_prescriptions_grouped['sex'] == sex]
plt.bar(x + (i * width - width / 2), subset['num_methylphenidate_prescriptions'], width, label=sex)

plt.xlabel('Year')
plt.ylabel('Number of Methylphenidate Prescriptions')
plt.title('Number of Methylphenidate Prescriptions per Year by Sex')
plt.xticks(x, years)
plt.legend(title='Sex')
plt.grid(True, axis='y')
plt.savefig(os.path.join(output_dir, 'methylphenidate_prescriptions_per_year_by_sex.png'))
plt.close()
#Need to save the table
prevelnce.to_csv('output/results_saved.csv')
7 changes: 3 additions & 4 deletions project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ expectations:

actions:
generate_dataset_full:
run: ehrql:v1 generate-dataset analysis/dataset_definition_core.py --output output/full_dataset.csv.gz
run: ehrql:v1 generate-dataset analysis/dataset_definition_core.py --output output/full_dataset_test.csv.gz
outputs:
highly_sensitive:
full_dataset: output/full_dataset.csv.gz
full_dataset: output/full_dataset_test.csv.gz

generate_charts:
run: python:v2 python analysis/report.py
needs: [generate_dataset_full]
outputs:
moderately_sensitive:
chart1: output/adhd_events_per_year_by_sex.png
chart2: output/methylphenidate_prescriptions_per_year_by_sex.png
table1: output/results_saved.csv

0 comments on commit 59f9f00

Please sign in to comment.