Skip to content

Commit

Permalink
Merge pull request #6 from pvanheus/make_data
Browse files Browse the repository at this point in the history
Add start of Linux lessons
  • Loading branch information
pvanheus authored Mar 4, 2024
2 parents aa9fdf4 + 2073d81 commit c2bdf51
Show file tree
Hide file tree
Showing 31 changed files with 20,412 additions and 61 deletions.
1,001 changes: 1,001 additions & 0 deletions data/shell/cases.csv

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions data/shell/file_list1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
SRR8364252,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364252_1.fastq.gz
SRR8364252,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364252_2.fastq.gz
SRR8364253,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364253_1.fastq.gz
SRR8364253,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364253_2.fastq.gz
SRR8364254,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364254_1.fastq.gz
SRR8364254,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364254_2.fastq.gz
SRR8364255,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364255_1.fastq.gz
SRR8364255,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364255_2.fastq.gz
SRR8364256,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364256_1.fastq.gz
SRR8364256,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364256_2.fastq.gz
SRR8364257,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364257_1.fastq.gz
SRR8364257,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364257_2.fastq.gz
SRR8364258,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364258_1.fastq.gz
SRR8364258,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364258_2.fastq.gz
SRR8364259,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364259_1.fastq.gz
SRR8364259,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364259_2.fastq.gz
SRR8364260,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364260_1.fastq.gz
SRR8364260,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364260_2.fastq.gz
SRR8364261,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364261_1.fastq.gz
SRR8364261,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364261_2.fastq.gz
20 changes: 20 additions & 0 deletions data/shell/file_list2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
SRR8364252,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364252_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364252_2.fastq.gz
SRR8364252,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364252_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364252_2.fastq.gz
SRR8364253,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364253_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364253_2.fastq.gz
SRR8364253,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364253_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364253_2.fastq.gz
SRR8364254,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364254_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364254_2.fastq.gz
SRR8364254,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364254_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364254_2.fastq.gz
SRR8364255,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364255_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364255_2.fastq.gz
SRR8364255,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364255_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364255_2.fastq.gz
SRR8364256,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364256_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364256_2.fastq.gz
SRR8364256,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364256_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364256_2.fastq.gz
SRR8364257,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364257_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364257_2.fastq.gz
SRR8364257,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364257_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364257_2.fastq.gz
SRR8364258,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364258_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364258_2.fastq.gz
SRR8364258,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364258_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364258_2.fastq.gz
SRR8364259,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364259_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364259_2.fastq.gz
SRR8364259,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364259_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364259_2.fastq.gz
SRR8364260,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364260_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364260_2.fastq.gz
SRR8364260,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364260_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364260_2.fastq.gz
SRR8364261,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364261_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364261_2.fastq.gz
SRR8364261,https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364261_1.fastq.gz;https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads/SRR8364261_2.fastq.gz
52 changes: 52 additions & 0 deletions data/shell/make_case_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#/usr/bin/env python3

from datetime import date
from random import random

# departments is a list of the Departments of Haiti
departments = ["Artibonite", "Centre", "Grande'Anse", "Nippes", "Nord", "Nord-Est", "Nord-Ouest", "Sud", "Sud-Est", "Ouest"]

def choose_location():
case_by_department = {
0.10: "Artibonite", # 1
0.20: "Centre", # 2
0.25: "Grande'Anse", # 3
0.35: "Nippes", # 4
0.45: "Nord", # 5
0.55: "Nord-Est", # 6
0.80: "Ouest", # 7
0.85: "Sud", # 8
0.90: "Sud-Est", # 9
0.95: "Nord-Ouest" # 10
}

for key in case_by_department:
if random() < key:
return case_by_department[key]


def make_case_data(num_cases=1000, min_date=date(2015, 1, 1), max_date=date(2015, 2, 28)):
print("case_id,department,case_date,gender,age,age_unit")
for i in range(num_cases):
case_id = f'CAS{i+1:05d}'
location = choose_location()
gender = 'M' if random() < 0.4 else 'F'
age_choice = random()
if age_choice < 0.2:
age = int(random() * 12)
age_unit = 'months'
elif age_choice < 0.5:
age = int(random() * 15) + 1
age_unit = 'years'
else:
age = int(random() * 35) + 16
age_unit = 'years'
case_date = date.fromordinal(min_date.toordinal() + int(random() * (max_date.toordinal() - min_date.toordinal())))
print(f"{case_id},{location},{case_date},{gender},{age},{age_unit}")

if __name__ == '__main__':
make_case_data()



# sample id /lat/ long/iso location code /strain /gender / age
8 changes: 8 additions & 0 deletions data/shell/make_file_list1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

url=https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads

for file in $(ls reads/) ; do
sample=${file%*_[12].fastq.gz}
echo $sample,${url}/$file
done
8 changes: 8 additions & 0 deletions data/shell/make_file_list2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

url=https://pathogen-genomics-march-2024.sanbi.ac.za/data/shell/reads

for file in $(ls reads/) ; do
sample=${file%*_[12].fastq.gz}
echo $sample,${url}/${sample}_1.fastq.gz\;${url}/${sample}_2.fastq.gz
done
Loading

0 comments on commit c2bdf51

Please sign in to comment.