-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDataset_construct.py
216 lines (197 loc) · 5.85 KB
/
Dataset_construct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import os
import csv
recipe_txt_path=os.path.join(os.getcwd(),'YinShanZhengYao_text/recipe_chapters')
fields=['Food_Name','Effect','Ingredients', 'Steps']
data={}
file_specific='JuanDiYiJuZhenYiZhuan_3.txt'
for file in os.listdir(recipe_txt_path):
file=file_specific
file_path=os.path.join(recipe_txt_path,file)
print('file_path:',file_path)
# Using readlines()
file_txt = open(file_path, 'r')
Lines = file_txt.readlines()
single_row=[]
rows=[]
count = 0
sequence=4 #default sequence number for one recipe
# Strips the newline character
for line in Lines:
count += 1
line=line.strip()
# Special treatment for 'JuanDiErShenXianFuShi.txt'
if file=='JuanDiErShenXianFuShi.txt':
if line=='服天门冬':
count=1
sequence=3
if line=='服地黄':
count=1
sequence=2
if line=='神枕法':
count=1
sequence=5
flag=True
if line=='服菖蒲':
count=1
sequence=3
if line=='服胡麻':
count=1
sequence=2
if line=='服莲子莲蕊':
count=1
sequence=3
if line=='服何首乌':
count=1
sequence=2
if file=='JuanDiErShiLiaoZhuBing.txt':
if line=='羊肉羹':
count=1
sequence=5
if line=='鹿蹄汤':
count=1
sequence=4
if line=='牛肉脯':
count=1
sequence=5
if line=='莲子粥':
count=1
sequence=4
if line=='牛奶子煎荜拨法':
count=1
sequence=2
if line=='肉羹':
count=1
sequence=4
if line=='羊肚羹':
count=1
sequence=5
if line=='葛粉羹':
count=1
sequence=4
if line=='恶实菜':
count=1
sequence=3
if line=='乌驴皮汤':
count=1
sequence=4
if file=='JuanDiErZhuBanTangJian.txt':
if line=='荔枝膏':
count=1
sequence=5
if line=='五味子汤':
count=1
sequence=4
if line=='橘皮醒酲汤':
count=1
sequence=5
if line=='渴忒饼儿':
count=1
sequence=4
if line=='牛髓膏子':
count=1
sequence=5
if line=='木瓜煎':
count=1
sequence=3
if line=='酥油':
count=1
sequence=2
if line=='清茶':
count=1
sequence=2
#Exception
rows.append(single_row)
single_row=[]
if line=='香茶':
count=1
sequence=3
if file=='JuanDiYiJuZhenYiZhuan_1.txt':
if line=='围像':
count=1
sequence=5
if line=='春盘面':
count=1
sequence=3
if line=='皂羹面':
count=1
sequence=4
if line=='水龙子':
count=1
sequence=5
if line=='马乞':
count=1
sequence=4
if line=='攒鸡儿':
count=1
sequence=3
if line=='鱼弹儿':
count=1
sequence=4
if line=='派饼儿':
count=1
sequence=2
if line=='盐肠':
count=1
sequence=3
if line=='脑瓦剌':
count=1
sequence=2
if file=='JuanDiYiJuZhenYiZhuan_2.txt':
if line=='姜黄鱼':
count=1
sequence=3
if line=='猪头姜豉':
count=1
sequence=4
if line=='蒲黄瓜齑':
count=1
sequence=3
if line=='攒牛蹄':
count=1
sequence=4
if line=='细乞思哥':
count=1
sequence=3
if line=='炸䐑儿':
count=1
sequence=4
if line=='熬蹄儿':
count=1
sequence=3
if line=='烧雁':
count=1
sequence=4
if line=='烧水札':
count=1
sequence=3
if line=='鹿奶肪馒头':
count=1
sequence=4
if line=='茄子馒头':
count=1
sequence=3
if line=='天花包子':
count=1
sequence=4
if line=='荷莲兜子':
count=1
sequence=3
single_row.append(line)
if count%sequence==0:
rows.append(single_row)
single_row=[]
count=0
data[file.split('.')[0]]=rows
break
for file, rows in data.items() :
# name of csv file
csv_filename = os.path.join('first_clean_recipe','v1_'+file+".csv")
print('csv_filename:',csv_filename)
# writing to csv file
with open(csv_filename, 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the fields
csvwriter.writerow(fields)
# writing the data rows
csvwriter.writerows(rows)