-
Notifications
You must be signed in to change notification settings - Fork 4
/
read_logs.py
152 lines (139 loc) · 6.22 KB
/
read_logs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import pandas as pd
import numpy as np
import subprocess
import os
def hex2page(value):
new_value = -111+int(value, 16)*0.488
return new_value
def check_log(filename,form='wtreg'):
"""Checks to make sure the lines in the file have the correct length.
Files with less than 26 errored lines will be copied to
[filename]_original and [filename] will be rewritten without the
errored lines.
This function will not run on a Windows platform!
"""
i=0
j=0
f=open(filename)
if form=='wtreg':
len_arrays = np.array([47, 83])
if form=='old7':
len_arrays = np.array([42, 78])
if form=='newok':
len_arrays = np.array([47, 88])
for line_no, line in enumerate(f):
if np.all(len(line.strip()) != len_arrays):
print (line)
i+=1
f.close()
if i>26:
print ("%s may be in a different format!" %(filename))
print ("Moving to '_original' and ignoring!")
subprocess.call(['mv', filename, filename+'_original'])
if (i>0) & (i<26):
if os.path.isfile(filename+'_original'):
print ("Original copies already exists for %s!" %(filename))
else:
f = open(filename)
print ("%s has some bad lines," %(filename))
print ("original copy will be made and bad lines will be removed" )
subprocess.call(['cp', filename, filename+'_original'])
for line_no, line in enumerate(f):
if np.all(len(line.strip()) != len_arrays):
subprocess.call(['sed', '-i.bak', "%s d" %(line_no+1-j),
filename])
j+=1
subprocess.call(['rm', filename+'.bak'])
f.close()
def parsing(filename, T_set='False',form='wtreg'):
"""filename must be in path/TxYYMMDD format. Returns Pandas dataframe
The log file will be run through a checker to make sure that there are
no bad lines.
Thresholds will be converted from hex format to dBm
If T_set is set to 'True' only the thresholds, latitudes, longitudes and
altitudes will be returned with the station identifier as a suffix,
otherwise the entire log file will be parsed.
"""
check_log(filename,form)
if os.path.isfile(filename):
dateparse = lambda x: pd.datetime.strptime(x, '%m/%d/%y %H:%M:%S')
namelist = ['ID','Datetime','Version','Threshold','?',
'Triggers','GPS_Number','GPS_Mode','Temp',
'Lat','Lon','Alt']
if form=='wtreg':
widths_list = [1,18,4,5,12,7,3,3,3,9,10,8]
collist = [1,3,9,10,11]
if form=='old7':
widths_list = [1,18,4,5,7,7,3,3,3,9,10,8]
collist = [1,3,9,10,11]
if form=='newok':
widths_list = [1,18,4,5,12,7,3,3,4,4,9,10,8]
collist = [1,3,10,11,12]
namelist = ['ID','Datetime','Version','Threshold','???',
'Triggers','GPS_Number','GPS_Mode','Temp','Batt',
'Lat','Lon','Alt']
if T_set=='True':
df = pd.read_fwf(filename,
widths=widths_list,
names=namelist,
usecols=collist,
parse_dates = [0],
date_parser = dateparse,
na_values='\n')
station=filename[-7]
df['Threshold'] = df['Threshold'].apply(hex2page)
df=df.rename(columns = {'Threshold':'Threshold_%s'%station,
'Lat':'Lat_%s'%station,
'Lon':'Lon_%s'%station,
'Alt':'Alt_%s'%station})
else:
df = pd.read_fwf(filename,
widths=widths_list,
names=namelist,
parse_dates = [1],
date_parser = dateparse,
na_values='\n')
df['Threshold'] = df['Threshold'].apply(hex2page)
df=df.set_index('Datetime')
return df
def parsing_variable(filename, T_set='False'):
"""filename must be in path/TxYYMMDD format. Returns Pandas dataframe
The log file will NOT be run through a checker to make sure that there are
no bad lines as not all files will be the same widths for the quick check
used in the check_log function.
Thresholds will be converted from hex format to dBm
If T_set is set to 'True' only the thresholds, latitudes, longitudes and
altitudes will be returned with the station identifier as a suffix,
otherwise the entire log file will be parsed.
"""
if os.path.isfile(filename):
dateparse = lambda x: pd.datetime.strptime(x, '%m/%d/%y %H:%M:%S')
namelist = ['ID','Date','time','Version','Threshold','?','??'
'Triggers','GPS_Number','GPS_Mode','Temp',
'Lat','Lon','Alt']
collist = [1,2,4,12,13,14]
namelist = ['ID','Date','time','Version','Threshold','?','??',
'Triggers','GPS_Number','GPS_Mode','Temp','Batt',
'Lat','Lon','Alt']
if T_set=='True':
df = pd.read_fwf(filename,
names=namelist,
usecols=collist,
parse_dates = [[0,1]],
date_parser = dateparse,
na_values='\n')
station=filename[-7]
df['Threshold'] = df['Threshold'].apply(hex2page)
df=df.rename(columns = {'Threshold':'Threshold_%s'%station,
'Lat':'Lat_%s'%station,
'Lon':'Lon_%s'%station,
'Alt':'Alt_%s'%station})
else:
df = pd.read_fwf(filename,
names=namelist,
parse_dates = [[0,1]],
date_parser = dateparse,
na_values='\n')
df['Threshold'] = df['Threshold'].apply(hex2page)
df=df.set_index('Date_time')
return df