Skip to content

Commit

Permalink
debug src_rec
Browse files Browse the repository at this point in the history
  • Loading branch information
xumi1993 committed Mar 31, 2024
1 parent 11164dc commit dd28e78
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 115,865 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,6 @@ dmypy.json
crust_model/
*.xmf

test_src_rec_eg
test_src_rec_eg
test_src_rec_sd.ipynb
test_src_rec_config.dat
136 changes: 93 additions & 43 deletions pytomoatt/src_rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,27 +247,43 @@ def read(cls, fname: str, dist_in_data=False, name_net_and_sta=False, **kwargs):
sr.rec_points_cr = alldf[
alldf[11].astype(str).str.contains("cr")
].reset_index(drop=True)
if not sr.rec_points_cr.empty:
if sr.rec_points_cr.shape[1] == last_col + 1:
if sr.rec_points_cr.shape[1] == last_col + 1:
if not sr.rec_points_cr.empty:
# add another column for weight
sr.rec_points_cr.loc[:, last_col + 1] = 1.0
# set column names and types
cols, data_type = setup_rec_points_dd(type='cr')
sr.rec_points_cr.columns = cols
sr.rec_points_cr = sr.rec_points_cr.astype(data_type)
else:
sr.rec_points_cr[last_col + 1] = pd.Series()
elif sr.rec_points_cr.shape[1] not in [last_col+1, last_col+2]:
sr.log.SrcReclog.error(
f"Only common receiver data with {last_col+1} or {last_col+2} columns are supported, "
"please check the format of common receiver data"
)
return sr
# set column names and types
cols, data_type = setup_rec_points_dd(type='cr')
sr.rec_points_cr.columns = cols
sr.rec_points_cr = sr.rec_points_cr.astype(data_type)

# read common source data
sr.rec_points_cs = alldf[
alldf[11].astype(str).str.contains("cs")
].reset_index(drop=True)
if not sr.rec_points_cs.empty:
if sr.rec_points_cs.shape[1] == last_col + 1:
if sr.rec_points_cs.shape[1] == last_col + 1:
if not sr.rec_points_cs.empty:
# add another column for weight
sr.rec_points_cs.loc[:, last_col + 1] = 1.0
# set column names and types
cols, data_type = setup_rec_points_dd(type='cs')
sr.rec_points_cs.columns = cols
sr.rec_points_cs = sr.rec_points_cs.astype(data_type)
else:
sr.rec_points_cs[last_col + 1] = pd.Series()
elif sr.rec_points_cs.shape[1] not in [last_col+1, last_col+2]:
sr.log.SrcReclog.error(
f"Only common source data with {last_col+1} or {last_col+2} columns are supported, "
"please check the format of common source data"
)
return sr
# set column names and types
cols, data_type = setup_rec_points_dd(type='cs')
sr.rec_points_cs.columns = cols
sr.rec_points_cs = sr.rec_points_cs.astype(data_type)

sr.update_unique_src_rec()
return sr
Expand Down Expand Up @@ -438,7 +454,7 @@ def reset_index(self):
)
self.rec_points_cr.reset_index(drop=True, inplace=True)

self.src_points["src_index"] = new_index
self.src_points.set_index(new_index, inplace=True)
self.src_points.index.name = "src_index"

# reset rec_index to be 0, 1, 2, ... for rec_points
Expand All @@ -463,7 +479,9 @@ def append(self, sr):

self.reset_index()
sr.reset_index()


self.log.SrcReclog.info(f"src_points before appending: {self.src_points.shape[0]}")
self.log.SrcReclog.info(f"rec_points before appending: {self._count_records()}")
# number of sources to be added
n_src_offset = self.src_points.shape[0]

Expand All @@ -487,7 +505,15 @@ def append(self, sr):
self.rec_points = pd.concat(
[self.rec_points, sr.rec_points], ignore_index=True
)

self.rec_points_cr = pd.concat(
[self.rec_points_cr, sr.rec_points_cr], ignore_index=True
)
self.rec_points_cs = pd.concat(
[self.rec_points_cs, sr.rec_points_cs], ignore_index=True
)

self.log.SrcReclog.info(f"src_points after appending: {self.src_points.shape[0]}")
self.log.SrcReclog.info(f"rec_points after appending: {self._count_records()}")
# store fnames
self.fnames.extend(sr.fnames)

Expand Down Expand Up @@ -532,6 +558,10 @@ def update_num_rec(self):
update num_rec in src_points by current rec_points
"""
self.src_points["num_rec"] = self.rec_points.groupby("src_index").size()
if not self.rec_points_cr.empty:
self.src_points["num_rec"] += self.rec_points_cr.groupby("src_index").size()
if not self.rec_points_cs.empty:
self.src_points["num_rec"] += self.rec_points_cs.groupby("src_index").size()

def update(self):
"""
Expand All @@ -551,6 +581,8 @@ def update(self):
# sort by src_index
self.src_points.sort_values(by=["src_index"], inplace=True)
self.rec_points.sort_values(by=["src_index", "rec_index"], inplace=True)
self.rec_points_cr.sort_values(by=["src_index", "rec_index"], inplace=True)
self.rec_points_cs.sort_values(by=["src_index", "rec_index1"], inplace=True)

def erase_src_with_no_rec(self):
"""
Expand Down Expand Up @@ -671,15 +703,23 @@ def select_phase(self, phase_list):
:param phase_list: List of phases for travel times used for inversion
:type phase_list: list of str
"""
if not isinstance(phase_list, (list, str)):
if not isinstance(phase_list, (list, tuple, str)):
raise TypeError("phase_list should be in list or str")
if isinstance(phase_list, str):
phase_list = [phase_list]
self.log.SrcReclog.info(
"rec_points before selecting: {}".format(self.rec_points.shape)
"rec_points before selection: {}".format(self._count_records())
)
self.rec_points = self.rec_points[self.rec_points["phase"].isin(phase_list)]
self.rec_points_cs = self.rec_points_cs[
self.rec_points_cs["phase"].isin([f'{ph},cs' for ph in phase_list])
]
self.rec_points_cr = self.rec_points_cr[
self.rec_points_cr["phase"].isin([f'{ph},cr' for ph in phase_list])
]
self.update()
self.log.SrcReclog.info(
"rec_points after selecting: {}".format(self.rec_points.shape)
"rec_points after selection: {}".format(self._count_records())
)

def select_by_datetime(self, time_range):
Expand All @@ -691,21 +731,21 @@ def select_by_datetime(self, time_range):
"""
# select source within this time range.
self.log.SrcReclog.info(
"src_points before selecting: {}".format(self.src_points.shape)
"src_points before selection: {}".format(self.src_points.shape[0])
)
self.log.SrcReclog.info(
"rec_points before selecting: {}".format(self.rec_points.shape)
"rec_points before selection: {}".format(self._count_records())
)
self.src_points = self.src_points[
(self.src_points["origin_time"] >= time_range[0])
& (self.src_points["origin_time"] <= time_range[1])
]
self.update()
self.log.SrcReclog.info(
"src_points after selecting: {}".format(self.src_points.shape)
"src_points after selection: {}".format(self.src_points.shape[0])
)
self.log.SrcReclog.info(
"rec_points after selecting: {}".format(self.rec_points.shape)
"rec_points after selection: {}".format(self._count_records())
)

def remove_specified_recs(self, rec_list):
Expand All @@ -732,10 +772,10 @@ def select_box_region(self, region):
"""
# select source within this region.
self.log.SrcReclog.info(
"src_points before selecting: {}".format(self.src_points.shape)
"src_points before selection: {}".format(self.src_points.shape[0])
)
self.log.SrcReclog.info(
"rec_points before selecting: {}".format(self.rec_points.shape)
"rec_points before selection: {}".format(self._count_records())
)
self.src_points = self.src_points[
(self.src_points["evlo"] >= region[0])
Expand All @@ -758,10 +798,10 @@ def select_box_region(self, region):
# Remove empty sources
self.update()
self.log.SrcReclog.info(
"src_points after selecting: {}".format(self.src_points.shape)
"src_points after selection: {}".format(self.src_points.shape)
)
self.log.SrcReclog.info(
"rec_points after selecting: {}".format(self.rec_points.shape)
"rec_points after selection: {}".format(self.rec_points.shape)
)

def select_depth(self, dep_min_max):
Expand All @@ -770,23 +810,23 @@ def select_depth(self, dep_min_max):
:param dep_min_max: limit of depth, ``[dep_min, dep_max]``
:type dep_min_max: sequence
"""
self.log.SrcReclog.info('src_points before selecting: {}'.format(self.src_points.shape))
self.log.SrcReclog.info('src_points before selection: {}'.format(self.src_points.shape))
self.log.SrcReclog.info(
"rec_points before selecting: {}".format(self.rec_points.shape)
"rec_points before selection: {}".format(self.rec_points.shape)
)
self.src_points = self.src_points[
(self.src_points['evdp'] >= dep_min_max[0]) &
(self.src_points['evdp'] <= dep_min_max[1])
]
self.update()
self.log.SrcReclog.info('src_points after selecting: {}'.format(self.src_points.shape))
self.log.SrcReclog.info('src_points after selection: {}'.format(self.src_points.shape))
self.log.SrcReclog.info(
"rec_points after selecting: {}".format(self.rec_points.shape)
"rec_points after selection: {}".format(self.rec_points.shape)
)

def calc_distance(self):
"""Calculate epicentral distance"""
self.rec_points["dist"] = 0.0
self.rec_points["dist_deg"] = 0.0
rec_group = self.rec_points.groupby("src_index")
for idx, rec in rec_group:
dist = DistAZ(
Expand All @@ -795,19 +835,22 @@ def calc_distance(self):
rec["stla"].values,
rec["stlo"].values,
).delta
self.rec_points["dist"].loc[rec.index] = dist
self.rec_points["dist_deg"].loc[rec.index] = dist

def select_distance(self, dist_min_max, recalc_dist=False):
"""Select stations in a range of distance
.. note::
This criteria only works for absolute travel time data.
:param dist_min_max: limit of distance in deg, ``[dist_min, dist_max]``
:type dist_min_max: list or tuple
"""
self.log.SrcReclog.info(
"rec_points before selecting: {}".format(self.rec_points.shape)
"rec_points before selection: {}".format(self._count_records())
)
# rec_group = self.rec_points.groupby('src_index')
if ("dist" not in self.rec_points) or recalc_dist:
if ("dist_deg" not in self.rec_points) or recalc_dist:
self.log.SrcReclog.info("Calculating epicentral distance...")
self.calc_distance()
elif not recalc_dist:
Expand All @@ -817,14 +860,14 @@ def select_distance(self, dist_min_max, recalc_dist=False):
"No such field of dist, please set up recalc_dist to True"
)
# for _, rec in rec_group:
mask = (self.rec_points["dist"] < dist_min_max[0]) | (
self.rec_points["dist"] > dist_min_max[1]
mask = (self.rec_points["dist_deg"] < dist_min_max[0]) | (
self.rec_points["dist_deg"] > dist_min_max[1]
)
drop_idx = self.rec_points[mask].index
self.rec_points = self.rec_points.drop(index=drop_idx)
self.update()
self.log.SrcReclog.info(
"rec_points after selecting: {}".format(self.rec_points.shape)
"rec_points after selection: {}".format(self._count_records())
)

def select_by_num_rec(self, num_rec: int):
Expand All @@ -834,19 +877,19 @@ def select_by_num_rec(self, num_rec: int):
"""
self.update_num_rec()
self.log.SrcReclog.info(
"src_points before selecting: {}".format(self.src_points.shape)
"src_points before selection: {}".format(self.src_points.shape[0])
)
self.log.SrcReclog.info(
"rec_points before selecting: {}".format(self.rec_points.shape)
"rec_points before selection: {}".format(self._count_records())
)
self.src_points = self.src_points[(self.src_points["num_rec"] >= num_rec)]
# self.remove_rec_by_new_src()
self.update()
self.log.SrcReclog.info(
"src_points after selecting: {}".format(self.src_points.shape)
"src_points after selection: {}".format(self.src_points.shape[0])
)
self.log.SrcReclog.info(
"rec_points after selecting: {}".format(self.rec_points.shape)
"rec_points after selection: {}".format(self._count_records())
)

def select_one_event_in_each_subgrid(self, d_deg: float, d_km: float):
Expand All @@ -859,7 +902,7 @@ def select_one_event_in_each_subgrid(self, d_deg: float, d_km: float):
"""

self.log.SrcReclog.info(
"src_points before selecting: {}".format(self.src_points.shape)
"src_points before selection: {}".format(self.src_points.shape[0])
)
self.log.SrcReclog.info("processing... (this may take a few minutes)")

Expand Down Expand Up @@ -902,7 +945,7 @@ def select_one_event_in_each_subgrid(self, d_deg: float, d_km: float):
self.src_points = self.src_points.sort_index()

self.log.SrcReclog.info(
"src_points after selecting: {}".format(self.src_points.shape)
"src_points after selection: {}".format(self.src_points.shape[0])
)

# remove rec_points by new src_points
Expand All @@ -922,6 +965,13 @@ def count_events_per_station(self):
"num_events"
].transform("max")

def _count_records(self):
count = 0
count += self.rec_points.shape[0]
count += self.rec_points_cs.shape[0]
count += self.rec_points_cr.shape[0]
return count

def _calc_weights(self, lat, lon, scale):
points = pd.concat([lon, lat], axis=1)
points_rad = points * (np.pi / 180)
Expand Down
Loading

0 comments on commit dd28e78

Please sign in to comment.