-
Notifications
You must be signed in to change notification settings - Fork 4
/
create_column.py
73 lines (56 loc) · 2.88 KB
/
create_column.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
"""
Modified on Mon Apr 6 13:35:10 2020
@author: gagrawa3
"""
import json
import json_object_parser as parser
import sys
# create a json obj to store the parsed results
list_json_obj = []
json_obj = []
# create community, domain and asset object
community_list = []
domain_list = []
asset_set = set()
asset_list = []
def ingest_column(communityName, domainName, schemaList, tableList, colList, attrList, session, parentCommunityName = None):
global json_obj
community_list.append(communityName)
domain_list.append((communityName, domainName, 'Physical Data Dictionary'))
if schemaList != None:
for schemaName, tableName, columnName, columnAttr in zip(schemaList, tableList, colList, attrList):
# (relation_type_id:relation_direction, relation_asset_name)
relations = ('00000000-0000-0000-0000-000000007042:TARGET', schemaName+'.'+tableName)
asset_set.add((communityName, domainName, schemaName+'.'+tableName+'.'+columnName, 'Column', columnName, columnAttr, relations))
else:
for tableName, columnName, columnAttr in zip(tableList, colList, attrList):
# (relation_type_id:relation_direction, relation_asset_name)
relations = ('00000000-0000-0000-0000-000000007042:TARGET', tableName)
asset_set.add((communityName, domainName, tableName+'.'+columnName, 'Column', columnName, columnAttr, relations))
asset_list = list(asset_set)
for community in community_list:
json_obj.append(parser.getCommunityObj(community, parentCommunityName))
for communityName, domainName, domainType in domain_list:
json_obj.append(parser.getDomainObj(communityName, domainName, domainType))
for communityName, domainName, assetName, assetType, columnName, attrList, relation in asset_list:
# split if file size is roughly around 22MB
if(sys.getsizeof(json_obj) > 400000) :
list_json_obj.append(json_obj)
json_obj = []
json_obj.append(parser.getAssetObj(communityName, domainName, assetName, assetType, columnName, attrList, relation))
list_json_obj.append(json_obj)
for index, json_obj in enumerate(list_json_obj,1):
with open(f"column_template_{index}.json", "w") as write_file:
json.dump(json_obj, write_file)
print(f"Ingesting data from template file: column_template_{index}.json")
url = 'https://asu-dev.collibra.com/rest/2.0/import/json-job'
# url = 'https://asu.collibra.com/rest/2.0/import/json-job'
files = {'file': open(f'column_template_{index}.json', 'rb')}
payload = {'sendNotification':'true'}
response = session.post(url, files=files, data=payload)
# print(response.request.headers)
if response:
print(response.json())
else:
print(response.text)