-
Notifications
You must be signed in to change notification settings - Fork 4
/
fetch-openapi.py
executable file
·345 lines (287 loc) · 11.6 KB
/
fetch-openapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import sys
import requests
import json
if len(sys.argv) != 3:
print("Need 2 arguments, output directory and environment")
exit(1)
output = sys.argv[1]
env = sys.argv[2]
if env == 'dev' or env == 'dev2':
indent = 2
else:
indent = None
print("--- Fetching and processing OpenAPI documentation ---")
print("Output to "+output+" for environment "+env)
print("")
print("--- Finding available webservices ---")
response = requests.get('http://ws.gbif.org/applications', headers={'Accept': 'application/json'})
services = json.loads(response.text)
urls = {}
# Find services
for s in services:
for i in s["instances"]:
if (i['tags']['env'] == env):
if i['registration']['serviceUrl'].find('gbif') > 0:
urls[s['name']] = i['registration']['serviceUrl']
print("Found "+env+" "+s['name']+" at "+urls[s['name']])
print("")
def to_filename(ws):
ws = ws.replace('-ws', '')
if (ws == 'vectortile-server'):
ws = 'v2-maps'
if (ws == 'pipelines-validator'):
ws = 'validator'
if (ws == 'vocabulary-rest'):
ws = 'vocabulary'
return ws
def to_url(ws, url):
if (url.endswith('/')):
return url+"v3/api-docs"
else:
return url+"/v3/api-docs"
# Retrieve the documentation
print("--- Retrieving OpenAPI specifications ---")
for ws, url in urls.items():
filename = output + '/' + to_filename(ws) + '.json'
print("Fetching documentation for "+ws+" from "+to_url(ws, url))
response = requests.get(to_url(ws, url))
if response.status_code == 200:
if ws == 'registry-ws':
registry = json.loads(response.text)
elif ws == 'occurrence-ws':
occurrence = json.loads(response.text)
elif ws == 'metrics-ws':
metrics = json.loads(response.text)
elif ws == 'geocode-ws':
geocode = json.loads(response.text)
elif ws == 'checklistbank-ws':
checklistbank = json.loads(response.text)
elif ws == 'checklistbank-nub-ws':
checklistbanknub = json.loads(response.text)
elif ws == 'occurrence-annotation-ws':
occurrenceannotation = json.loads(response.text)
else:
openapi = json.loads(response.text)
with open(filename, "w") as write_file:
json.dump(openapi, write_file, separators=(',', ':'), indent=indent)
print("Retrieved documentation for "+ws)
else:
print("Response "+str(response.status_code)+" for "+ws+", ignoring while in dev.")
print("")
# Development
#response = requests.get("http://localhost:8080/v3/api-docs")
#registry = json.loads(response.text)
#response = requests.get("http://localhost:8080/v3/api-docs")
#occurrence = json.loads(response.text)
#response = requests.get("http://localhost:8080/v3/api-docs")
#checklistbank = json.loads(response.text)
# End development
# Special cases for registry and occurrence
print("--- Moving some method-paths from Registry to Occurrence ---")
movePrefixFromRegistryToOccurrence = [
'/occurrence/download/'
]
# Although these are @Hidden, they still end up being produced for some reason.
removePrefixFromRegistry = [
'/event/download',
'/occurrence/download'
]
toRemove = []
for path in registry["paths"]:
for prefix in movePrefixFromRegistryToOccurrence:
if path.startswith(prefix):
occurrence["paths"][path] = registry["paths"][path]
print("Added "+path+" to occurrence")
toRemove.append(path)
#json.dump(registry["paths"][path], sys.stdout, separators=(',', ':'), indent=indent)
for prefix in removePrefixFromRegistry:
if path.startswith(prefix):
toRemove.append(path)
# Schemas need duplicating
registrySchemas = [
'PagingResponseDownloadStatistics',
'DownloadStatistics',
'PagingResponseDatasetOccurrenceDownloadUsage',
'DatasetOccurrenceDownloadUsage',
#'DOI',
'Download',
'DownloadRequest',
'PagingResponseDownload'
]
for schema in registrySchemas:
occurrence['components']['schemas'][schema] = registry['components']['schemas'][schema]
for path in toRemove:
if path in registry["paths"]:
del registry["paths"][path]
print("Removed "+path+" from registry")
# Preface registry description
registry_description = registry['info']['description']
registry['info']['description'] = "**This is a view of *all Registry methods* available for advanced use. Most users of GBIF data will prefer the [Registry API — Principal methods](registry-principal-methods) page instead.**\n\n" + registry_description
print("")
# Special cases for geocode (moving to occurrence)
print("--- Moving some method-paths from Geocode to Occurrence ---")
movePrefixFromGeocodeToOccurrence = [
'/geocode/gadm/'
]
for path in geocode["paths"]:
for prefix in movePrefixFromGeocodeToOccurrence:
if path.startswith(prefix):
occurrence["paths"][path] = geocode["paths"][path]
print("Added "+path+" to occurrence")
# Schemas need duplicating
geocodeSchemas = ['GadmRegion', 'Region', 'Pageable', 'PagingResponseGadmRegion']
for schema in geocodeSchemas:
occurrence['components']['schemas'][schema] = geocode['components']['schemas'][schema]
print("")
# Special cases for metrics (moving to occurrence)
# Metrics schema can be ignored.
print("--- Moving all method-paths from Metrics to Occurrence ---")
for path in metrics["paths"]:
occurrence["paths"][path] = metrics["paths"][path]
print("Added "+path+" to occurrence")
# Schemas need duplicating
metricsSchemas = ['DimensionObject', 'Rollup']
for schema in metricsSchemas:
occurrence['components']['schemas'][schema] = metrics['components']['schemas'][schema]
print("")
# Special cases for occurrence-annotation (moving to occurrence), but not for prod.
if (env != 'prod'):
print("--- Moving some method-paths from Occurrence-Annotation to Occurrence ---")
movePrefixFromOccurrenceAnnotationToOccurrence = [
'/occurrence/experimental/annotation/'
]
for path in occurrenceannotation["paths"]:
for prefix in movePrefixFromOccurrenceAnnotationToOccurrence:
if path.startswith(prefix):
occurrence["paths"][path] = occurrenceannotation["paths"][path]
print("Added "+path+" to occurrence")
# Schemas need duplicating
occurrenceAnnotationSchemas = ['Ruleset', 'Project', 'Rule', 'Comment']
for schema in occurrenceAnnotationSchemas:
occurrence['components']['schemas'][schema] = occurrenceannotation['components']['schemas'][schema]
print("")
else:
print("--- Skipping Occurrence-Annotation as this is prod ---")
# Special cases for checklistbank (moving to registry)
print("--- Moving some method-paths from Checklistbank to Registry ---")
movePrefixFromChecklistbankToRegistry = [
'/dataset/{key}/metrics'
]
toRemove = []
for path in checklistbank["paths"]:
for prefix in movePrefixFromChecklistbankToRegistry:
if path.startswith(prefix):
registry["paths"][path] = checklistbank["paths"][path]
print("Added "+path+" to registry")
toRemove.append(path)
for path in toRemove:
if path in checklistbank["paths"]:
del checklistbank["paths"][path]
print("Removed "+path+" from checklistbank")
# Schemas need duplicating
checklistbankSchemas = [
'DatasetMetrics'
]
for schema in checklistbankSchemas:
registry['components']['schemas'][schema] = checklistbank['components']['schemas'][schema]
# Special cases for checklistbank / checklistbanknub
print("--- Moving all method-paths from ChecklistbankNub to Checklistbank ---")
for path in checklistbanknub["paths"]:
checklistbank["paths"][path] = checklistbanknub["paths"][path]
print("Added "+path+" to checklistbank")
# Schemas need duplicating
checklistbanknubSchemas = ['NameUsageMatch']
for schema in checklistbanknubSchemas:
checklistbank['components']['schemas'][schema] = checklistbanknub['components']['schemas'][schema]
print("")
# Write the result of all that moving.
with open(output+"/registry.json", "w") as write_file:
json.dump(registry, write_file, separators=(',', ':'), indent=indent)
with open(output+"/occurrence.json", "w") as write_file:
json.dump(occurrence, write_file, separators=(',', ':'), indent=indent)
with open(output+"/checklistbank.json", "w") as write_file:
json.dump(checklistbank, write_file, separators=(',', ':'), indent=indent)
print("")
print("--- Filtering for basic Registry API view ---")
# Selected registry path-methods for the basic view
registryBasicPath = {}
registryBasicPath['get'] = [
'/dataset',
'/dataset/doi/{prefix}/{suffix}',
'/dataset/metadata/{key}',
'/dataset/metadata/{key}/document',
'/dataset/search',
'/dataset/search/export',
'/dataset/{key}',
'/derivedDataset/dataset/{doiPrefix}/{doiSuffix}',
'/derivedDataset/dataset/{key}',
'/derivedDataset/user/{user}',
'/derivedDataset/{doiPrefix}/{doiSuffix}',
'/derivedDataset/{doiPrefix}/{doiSuffix}/citation',
'/derivedDataset/{doiPrefix}/{doiSuffix}/datasets',
'/grscicoll/collection',
'/grscicoll/collection/export',
'/grscicoll/collection/{key}',
'/grscicoll/institution',
'/grscicoll/institution/export',
'/grscicoll/institution/{key}',
'/grscicoll/search',
'/installation',
'/installation/{key}',
'/network',
'/network/{key}',
'/node',
'/node/{key}',
'/organization',
'/organization/{key}'
]
registryBasicPath['post'] = [
'/derivedDataset'
]
registryBasicPath['put'] = [
'/derivedDataset/{doiPrefix}/{doiSuffix}'
]
registryBasicPath['delete'] = []
complicated = {}
complicated['get'] = []
complicated['post'] = []
complicated['put'] = []
complicated['delete'] = []
tags_to_keep = []
#for tag in registry["tags"]:
# tags_to_remove.append(registry["tags"][tag]['name'])
for path in registry["paths"]:
for method in registry["paths"][path]:
if path not in registryBasicPath[method]:
print("Excluding "+method+" "+path+" from Registry principal methods view.")
complicated[method].append(path)
else:
print("Including "+method+" "+path+" from Registry principal methods view.")
for tag in registry["paths"][path][method]['tags']:
#if tag in tags_to_remove:
tags_to_keep.append(tag)
# Alternative way using code annotations, but see https://github.com/swagger-api/swagger-core/issues/3249
# if "x-Category" in registry["paths"][path][method]:
# print("Keep "+method+" "+path)
# else:
# print("Discard "+method+" "+path)
# complicated.append(path)
# Delete unwanted path-methods
for method in complicated:
for path in complicated[method]:
if path in registry["paths"]:
if method in registry["paths"][path]:
del registry["paths"][path][method]
# Delete unneeded tags
new_tags = []
for tag in registry["tags"]:
if tag['name'] in tags_to_keep:
new_tags.append(tag)
registry["tags"] = new_tags
# Add heading
registry['info']['title'] = registry['info']['title'] + " — Principal methods only"
registry['info']['description'] = "**This is a view of *principal methods only*, sufficient for most users of GBIF data.** Data publishers with write access to the GBIF Registry should refer to the [full Registry API documentation](registry).\n\n" + registry_description
with open(output+"/registry-principal-methods.json", "w") as write_file:
json.dump(registry, write_file, separators=(',', ':'), indent=indent)
print("")
print("=== OpenAPI specification generation completed ===")