-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrible2.py
41 lines (40 loc) · 1.47 KB
/
scrible2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
import cleanup_text_file
import csv
df = pd.read_csv('D:\BillD\CSV\gofrugal.png_final_clean_20180827042607.csv', sep='|')
df = df.loc[df['Y1'] > 178] # & df['Y1']<175 & df['X1']<400
df = df.loc[df['X1'] > 5]
df = df.loc[df['Y1'] < 251]
df = df.loc[df['X1'] < 665]
print df
df['Word']=df['Word'].astype(str)
df=df[['X1','Y1','X2','Y2','Word']]
df=df.reset_index()
#df=pd.DataFrame(columns=df['Word','Y1'])
sepchar=[]
colpos=[66]
for i in range(0,len(df)):
print df.loc[i]['X1']
if ((df.loc[i]['X1']>55 and df.loc[i]['X1']<62)
or (df.loc[i]['X1']>288 and df.loc[i]['X1']<304)
or (df.loc[i]['X1']>339 and df.loc[i]['X1']<385)
or (df.loc[i]['X1'] > 389 and df.loc[i]['X1'] < 390)
or (df.loc[i]['X1'] > 429 and df.loc[i]['X1'] < 433)
or (df.loc[i]['X1'] > 512 and df.loc[i]['X1'] < 515)):
print df.loc[i]['X1'], df.loc[i]['Word']
sepchar.append(',')
else :
print df.loc[i]['X1'], df.loc[i]['Word']
sepchar.append(' ')
# print sepchar
#print df
df['sepchar']=sepchar
print df
df['wordsepchar']=df['sepchar']+df['Word']
print df['wordsepchar']
df = df.groupby('Y1')['wordsepchar'].apply(lambda (x): x.str.cat(sep='', na_rep='?'),)
df=df.to_frame()
print list(df.columns.values)
#df= df['wordsepchar']
print df
df.to_csv('D:\BillD\CSV\gofrugal_part2.csv',quoting=csv.QUOTE_NONE, quotechar="", escapechar=" ")