forked from alan-turing-institute/uatk-spc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
synthpop.proto
434 lines (388 loc) · 14.1 KB
/
synthpop.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
syntax = "proto2";
package synthpop;
message Population {
repeated Household households = 1;
repeated Person people = 2;
// This is filled out per Activity enum value. The exception is for
// `Activity::Home`; see `households` for that.
map<int32, VenueList> venues_per_activity = 3;
// Keyed by MSOA11CD
map<string, InfoPerMSOA> info_per_msoa = 4;
required Lockdown lockdown = 5;
repeated TimeUseDiary time_use_diaries = 6;
// The year represented by this population
required uint32 year = 7;
}
message Household {
// This is the index into Population::households
required uint64 id = 1;
// 2011 MSOA code
required string msoa11cd = 2;
// 2011 OA code
required string oa11cd = 3;
// These IDs index into `Population::people`
repeated uint64 members = 4;
required HouseholdDetails details = 5;
}
message HouseholdDetails {
/// Unique household ID
required string hid = 1;
// National Statistics Socio-economic classification of the reference
// person of the household (LC4605, census 2011)
optional Nssec8 nssec8 = 2;
// Type of accommodation (based on LC4402EW, census 2011)
optional AccommodationType accommodation_type = 3;
// Type of communal establishment (based on QS420, census 2011)
optional CommunalType communal_type = 4;
// Number of rooms in the accommodation capped at 6 (LC4404EW, census 2011)
optional uint64 num_rooms = 5;
// Presence of central heating (based on LC4402EW, census 2011)
required bool central_heat = 6;
// Tenure (based on LC4402EW, census 2011)
optional Tenure tenure = 7;
// Number of cars capped at 3 (LC4202EW, census 2011)
optional uint64 num_cars = 8;
}
message VenueList {
repeated Venue venues = 1;
}
message InfoPerMSOA {
repeated Point shape = 1;
// There's no relationship between this value and the people listed.
required uint64 population = 2;
// All building centroids within this MSOA. May be empty.
repeated Point buildings = 3;
repeated Flows flows_per_activity = 4;
}
// In WGS84
message Point {
required float longitude = 1;
required float latitude = 2;
}
message Person {
// This is the index into Population::people
required uint64 id = 1;
// This is the index into Population::households
required uint64 household = 2;
// This indexes into venues_per_activity[Activity::WORK]
optional uint64 workplace = 3;
required Identifiers identifiers = 4;
required Demographics demographics = 5;
required Employment employment = 6;
required Health health = 7;
required Events events = 8;
// A person's time use isn't the same every day.
// These're keys in the Population-wide time_use_diaries mapping
repeated uint32 weekday_diaries = 9;
repeated uint32 weekend_diaries = 10;
}
message Identifiers {
/// Unique person ID
required string orig_pid = 1;
// household identifier linking to the UK Time Use Survey 2015
required int64 id_tus_hh = 2;
// person identifier linking to the UK Time Use Survey 2015
required int64 id_tus_p = 3;
// identifier linking to the Health Surveys of Great Britain (one per country)
required int64 pid_hs = 4;
}
message Demographics {
required Sex sex = 1;
// Clamped at 86 as a maximum, due to limits with source data
required uint32 age_years = 2;
required Ethnicity ethnicity = 3;
optional Nssec8 nssec8 = 4;
}
message Employment {
// The Standard Industrial Classification of Economic Activities 2007, 1st
// layer where this person works. This is a single capitalized character,
// like "A" or "B". See
// https://www.ons.gov.uk/methodology/classificationsandstandards/ukstandardindustrialclassificationofeconomicactivities.
optional string sic1d2007 = 1;
// Standard Industrial Classification of Economic Activities 2007, 2nd
// layer.
optional uint64 sic2d2007 = 2;
// Previous version of the Standard Occupational Classification
// (https://www.ons.gov.uk/methodology/classificationsandstandards/standardoccupationalclassificationsoc/soc2010).
optional uint64 soc2010 = 3;
required PwkStat pwkstat = 4;
// Yearly salary in GBP. Note this may be 0 when salary_hourly is nonzero, when TimeUse is 0 for work.
optional float salary_yearly = 5;
// Hourly salary in GBP.
optional float salary_hourly = 6;
}
enum Sex {
MALE = 1;
FEMALE = 2;
}
enum Ethnicity {
WHITE = 1;
BLACK = 2;
ASIAN = 3;
MIXED = 4;
OTHER = 5;
}
enum AccommodationType {
DETACHED = 1;
SEMI_DETACHED = 2;
TERRACED = 3;
FLAT = 4;
}
enum CommunalType {
COMMUNAL = 0;
MEDICAL = 1;
MEDICAL_NHS = 2;
MEDICAL_NHS_HOSPITAL = 3;
MEDICAL_NHS_MENTAL = 4;
MEDICAL_NHS_OTHER = 5;
MEDICAL_LA = 6;
MEDICAL_LA_CHILDREN = 7;
MEDICAL_LA_CARE_HOME_NURSING = 8;
MEDICAL_LA_CARE_HOME_NO_NURSING = 9;
MEDICAL_LA_OTHER = 10;
MEDICAL_SOCIAL = 11;
MEDICAL_SOCIAL_HOSTEL = 12;
MEDICAL_SOCIAL_SHELTER = 13;
MEDICAL_OTHER = 14;
MEDICAL_OTHER_CARE_HOME_NURSING = 15;
MEDICAL_OTHER_CARE_HOME_NO_NURSING = 16;
MEDICAL_OTHER_CHILDREN = 17;
MEDICAL_OTHER_MENTAL = 18;
MEDICAL_OTHER_HOSPITAL = 19;
MEDICAL_OTHER_OTHER = 20;
COM_OTHER = 21;
DEFENSE = 22;
PRISON = 23;
PROBATION = 24;
DETENTION = 25;
EDUCATION = 26;
HOTEL = 27;
HOSTEL = 28;
HOLIDAY = 29;
TRAVEL = 30;
RELIGIOUS = 31;
STAFF = 32;
OTHER_OTHER = 33;
NOT_STATED = 34;
}
enum Tenure {
OWNED_FULLY = 1;
OWNED_MORTGAGE = 2;
RENTED_FREE = 3;
RENTED_SOCIAL = 4;
RENTED_PRIVATE = 5;
}
// National Statistics socio-economic classification
enum Nssec8 {
// Higher managerial, administrative and professional occupations
HIGHER = 1;
// Lower managerial, administrative and professional occupations
LOWER = 2;
// Intermediate occupations
INTERMEDIATE = 3;
// Small employers and own account workers
SMALL = 4;
// Lower supervisory and technical occupations
SUPER = 5;
// Semi-routine occupations
SEMIROUTINE = 6;
// Routine occupations
ROUTINE = 7;
// Never worked and long-term unemployed
NEVER = 8;
}
// Professional working status, from the time-use survey
enum PwkStat {
// Not applicable (age < 16)
NA = 0;
// Full-time employee
EMPLOYEE_FT = 1;
// Part-time employee
EMPLOYEE_PT = 2;
// Unspecified employee
EMPLOYEE_UNSPEC = 3;
// Self-employed
SELF_EMPLOYED = 4;
// Unemployed (note this has a PWK_ prefix due to protobuf scoping rules)
PWK_UNEMPLOYED = 5;
// Retired
RETIRED = 6;
// Homemaker / maternity leave
HOMEMAKER = 7;
// Full-time student
STUDENT_FT = 8;
// Long-term sick / disability
LONG_TERM_SICK = 9;
// Other (note this has a PWK_ prefix due to protobuf scoping rules)
PWK_OTHER = 10;
}
message Health {
// Derived from the Health Survey for England 2019
optional float bmi = 1;
// For England, cardiovascular medication taken in the last 7 days; for Scotland, had cardiovascular condition excluding diabetes / blood pressure; for Wales, any heart condition excluding high boold pressure
required bool has_cardiovascular_disease = 2;
// For Scotland and England, has doctor diagnosed diabetes; for Wales, diabetes currently treated
required bool has_diabetes = 3;
// For Scotland and England, Doctor diagnosed high blood pressure; for Wales, high blood pressure currently treated
required bool has_high_blood_pressure = 4;
// Number of prescribed medications; capped at 8 for England, at 18 for Scotland, at 1 for Wales
optional uint64 number_medications = 5;
// Self assessed general health
optional SelfAssessedHealth self_assessed_health = 6;
// How satisfied with life nowadays
optional LifeSatisfaction life_satisfaction = 7;
}
// Self-assessed general health
enum SelfAssessedHealth {
// For Welsh adults, VERY_GOOD includes EXCELLENT
VERY_GOOD = 1;
GOOD = 2;
FAIR = 3;
// For Welsh adults, BAD includes VERY_BAD
BAD = 4;
VERY_BAD = 5;
}
// Life satisfaction
enum LifeSatisfaction {
LOW = 1;
MEDIUM = 2;
HIGH = 3;
VERY_HIGH = 4;
}
message Events {
// Relative propbability weight to attend a sport fixture
required float sport = 1;
// Relative propbability weight to attend a Rugby fixture
required float rugby = 2;
// Relative propbability weight to attend a concert primarily targetting young males
required float concert_m = 3;
// Relative propbability weight to attend a concert primarily targetting young females
required float concert_f = 4;
// Relative propbability weight to attend a concert primarily targetting middle-aged males
required float concert_ms = 5;
// Relative propbability weight to attend a concert primarily targetting middle-aged females
required float concert_fs = 6;
// Relative propbability weight to visit a museum
required float museum = 7;
}
// Per activity, a list of venues where anybody living in a certain MSOA is
// likely to go do that activity.
message Flows {
// Note that HOME and WORK won't be represented here, since it varies
// per-person.
required Activity activity = 1;
repeated Flow flows = 2;
}
message Flow {
required uint64 venue_id = 1;
// The probabilities sum to 1 for all `flows`
required double weight = 2;
}
// Represents a place where people do an activity
message Venue {
// IDs are reused across different activities; both this id and an Activity
// is needed to lookup from venues_per_activity.
required uint64 id = 1;
required Activity activity = 2;
required Point location = 3;
// This only exists for PrimarySchool and SecondarySchool. It's a
// https://en.wikipedia.org/wiki/Unique_Reference_Number.
optional string urn = 4;
}
enum Activity {
RETAIL = 0;
PRIMARY_SCHOOL = 1;
SECONDARY_SCHOOL = 2;
HOME = 3;
WORK = 4;
}
// Describes a reduction in daily travel behavior due to COVID lockdowns.
message Lockdown {
// The YYYY-MM-DD date of the first entry in the daily list.
required string start_date = 1;
// A list of factors by which regular mobility was reduced, with the first
// corresponding to start_date. 1.0 means no change from normal. Greater
// than 1 means some percent of regular mobility was reduced.
repeated float change_per_day = 2;
}
message TimeUseDiary {
// Unique identifier of a diary entry (derived from TUS 15, serial_pnum_weekday)
required string uid = 1;
// Working day or weekend day (derived from uktus15_dv_time_vars)
required bool weekday = 2;
// TODO Enum
// Type of day (KindOfDay in uktus15_dv_time_vars)
// 1 A workday
// 2 A school or college day
// 3 A day off due to the weekend/holiday or work schedule
// 4 A sick leave day
// 5 A vacation/annual leave day
// 6 On leave for other reasons
// 7 None of the above
// -9 No answer/refused
// -7 Interview not achieved
// -2 Schedule not applicable
required int32 day_type = 3;
// Month of the year, as [1, 12] (dmonth in uktus15_dv_time_vars)
required uint32 month = 4;
// Time at home
// Proportion of the day spent working at home (derived from dml1_1 in
// uktus15_dv_time_vars and uktus15_diary_ep_long)
required float pworkhome = 5;
// Proportion of the day spent doing other activities at home (derived from
// the sum of dml1_0, dml1_3, dml1_7 and dml1_8 in uktus15_dv_time_vars)
required float phomeother = 6;
// Time away from home
// Proportion of the day spent working away from home (derived from dml1_1
// in uktus15_dv_time_vars and uktus15_diary_ep_long)
required float pwork = 7;
// Proportion of the day spent at school (derived from dml2_21 in
// uktus15_dv_time_vars)
required float pschool = 8;
// Proportion of the day spent shopping (derived from dml3_361 in
// uktus15_dv_time_vars)
required float pshop = 9;
// Proportion of the day spent at commercial or personal services (derived
// from the sum of dml3_362 and dml3_363 in uktus15_dv_time_vars)
required float pservices = 10;
// Proportion of the day spent doing outdoors leisure activities (derived
// from the sum of dml1_4, dml1_5 and dml1_6 in uktus15_dv_time_vars)
required float pleisure = 11;
// Proportion of the day spent escorting someone else (derived from
// dml3_923 in uktus15_dv_time_vars)
required float pescort = 12;
// Proportion of the day spent travelling (derived from dml1_9a in
// uktus15_dv_time_vars)
required float ptransport = 13;
// Totals
// Total proportion of the day spent doing activities at home (formula =
// pworkhome + phomeother)
required float phome_total = 14;
// Total proportion of the day spent oudoors (formula = pwork + pschool +
// pshop + pservices + pleisure + pescort + ptransport)
required float pnothome_total = 15;
// Total reamining proportion of the day spent (formula = 1 - (phomeTOT +
// pnothomeTOT)); if negative, removed form phomeother
required float punknown_total = 16;
// Transport
// Proportion of travel done walking (derived from uktus15_diary_ep_long)
required float pmwalk = 17;
// Proportion of travel done cycling (derived from uktus15_diary_ep_long)
required float pmcycle = 18;
// Proportion of travel through a private mean of transportation (includes
// walking and cycling) (derived from uktus15_diary_ep_long)
required float pmprivate = 19;
// Proportion of travel done through a public mean of transportation
// (derived from uktus15_diary_ep_long)
required float pmpublic = 20;
// Proportion of travel done through an unknown mean - note that pm unknown
// + pmpublic + pmprivate sum to 1 or 0 if no travel is made (derived from
// uktus15_diary_ep_long)
required float pmunknown = 21;
// Characteristics of the person who filled out this diary
required Sex sex = 22;
// An ONS-defined age bucket
required uint32 age35g = 23;
optional Nssec8 nssec8 = 24;
required PwkStat pwkstat = 25;
}