forked from facebookresearch/code-prediction-transformer
-
Notifications
You must be signed in to change notification settings - Fork 2
/
generate_new_trees.py
85 lines (70 loc) · 2.53 KB
/
generate_new_trees.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import json
import logging
import os
from utils import file_tqdm
logging.basicConfig(level=logging.INFO)
def convert(ast):
increase_by = {} # count of how many idx to increase the new idx by:
# each time there is a value node
cur = 0
for i, node in enumerate(ast):
increase_by[i] = cur
if "value" in node:
cur += 1
new_dp = []
for i, node in enumerate(ast):
inc = increase_by[i]
if "value" in node:
child = [i + inc + 1]
if "children" in node:
child += [n + increase_by[n] for n in node["children"]]
new_dp.append({"type": node["type"], "children": child})
new_dp.append({"value": node["value"]})
else:
if "children" in node:
node["children"] = [n + increase_by[n] for n in node["children"]]
new_dp.append(node)
# sanity check
children = []
for node in new_dp:
if "children" in node:
children += node["children"]
assert len(children) == len(set(children))
return new_dp
def external(file_path, suffix):
outfile = "output/{}_new_trees.json".format(suffix)
if os.path.exists(outfile):
os.remove(outfile)
logging.info("Loading asts from: {}".format(file_path))
with open(file_path, "r") as f, open(outfile, "w") as fout:
for line in file_tqdm(f):
dp = json.loads(line.strip())
print(json.dumps(convert(dp)), file=fout)
logging.info("Wrote dps to: {}".format(outfile))
def main():
parser = argparse.ArgumentParser(description="Generate datapoints from AST")
parser.add_argument("--input_fp", "-i", help="Filepath with the ASTs to be parsed")
parser.add_argument(
"--out_fp",
"-o",
default="/tmp/new_trees.json",
help="Filepath with the output dps",
)
args = parser.parse_args()
if os.path.exists(args.out_fp):
os.remove(args.out_fp)
logging.info("Loading asts from: {}".format(args.input_fp))
with open(args.input_fp, "r") as f, open(args.out_fp, "w") as fout:
for line in file_tqdm(f):
dp = json.loads(line.strip())
print(json.dumps(convert(dp)), file=fout)
logging.info("Wrote dps to: {}".format(args.out_fp))
if __name__ == "__main__":
main()