diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index a5cac58..f29708a 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -11,10 +11,6 @@ on: config-path: required: true type: string - secrets: - token: - required: true - jobs: deploy: runs-on: ubuntu-latest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index a4bf006..7ed25e0 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -23,13 +23,13 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip - sudo pip install twine + pip install twine setuptools - name: Build package run: python setup.py sdist build - name: Publish package @@ -37,7 +37,10 @@ jobs: with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - - Build_docs: - uses: ./.github/workflows/build-docs.yml + Build_docs: + uses: MIGG-NTU/PyTomoATT/.github/workflows/build-docs.yml@devel + with: + config-path: .github/workflows/build-docs.yml + secrets: inherit + diff --git a/pytomoatt/attarray.py b/pytomoatt/attarray.py index 33ab94b..cb0faa9 100644 --- a/pytomoatt/attarray.py +++ b/pytomoatt/attarray.py @@ -11,6 +11,11 @@ def __init__(self, data_vars, coords, attrs=None) -> None: __slots__ = () super().__init__(data_vars, coords, attrs) + @classmethod + def from_xarray(cls, dataset): + ds = cls(dataset.data_vars, dataset.coords) + return ds + def interp_dep(self, depth:float, field:str): """Interpolate map view with given depth diff --git a/pytomoatt/checkerboard.py b/pytomoatt/checkerboard.py index 9735548..2d3dfc5 100644 --- a/pytomoatt/checkerboard.py +++ b/pytomoatt/checkerboard.py @@ -103,8 +103,8 @@ def checkerboard(self, period_x, period_y, period_z, self.dlnv = self.perturbation*pert_vel self.epsilon = np.abs(self.perturbation)*pert_ani self.phi = np.zeros_like(self.vel) - self.phi[np.where(self.perturbation>0)] = 120. - self.phi[np.where(self.perturbation<0)] = 60. + self.phi[np.where(self.perturbation>0)] = 135. + self.phi[np.where(self.perturbation<0)] = 45. self.xi = self.epsilon*cosd(2*self.phi) self.eta = self.epsilon*sind(2*self.phi) diff --git a/pytomoatt/data.py b/pytomoatt/data.py index e2a658e..32bdb39 100644 --- a/pytomoatt/data.py +++ b/pytomoatt/data.py @@ -6,6 +6,15 @@ class ATTData(): + """Read data from HDF5 or ASCII file + + :param fname: Path to data file + :type fname: str + :param fname_params: Path to input parameter file + :type fname_params: str + :param fname_grid: Path to grid file + :type fname_grid: str + """ def __init__(self, fname:str, fname_params:str, fname_grid='OUTPUT_FILES/out_data_grid.h5'): @@ -37,6 +46,23 @@ def read(cls, fname:str, fname_params:str, fname_grid='OUTPUT_FILES/out_data_grid.h5', group_name='model', dataset_name=None, format='hdf5'): + """Read data from HDF5 or ASCII file + + :param fname: Path to data file + :type fname: str + :param fname_params: Path to input parameter file + :type fname_params: str + :param fname_grid: Path to grid file + :type fname_grid: str + :param group_name: Name of the group in the HDF5 file + :type group_name: str + :param dataset_name: Name of the dataset in the HDF5 file + :type dataset_name: str + :param format: Format of the data file, defaults to 'hdf5' + :type format: str, optional + :return: An instance of ATTData + :rtype: ATTData + """ attdata = cls(fname, fname_params, fname_grid) attdata.format = format # attdata.group_name = group_name diff --git a/pytomoatt/distaz.py b/pytomoatt/distaz.py index 781cd9d..22b4be7 100644 --- a/pytomoatt/distaz.py +++ b/pytomoatt/distaz.py @@ -3,6 +3,22 @@ class DistAZ: """ + DistAZ class + + Calculate the distance, azimuth and back-azimuth between two points on the + Earth's surface. + + :param lat1: Latitude of point 1 + :type lat1: float + :param lon1: Longitude of point 1 + :type lon1: float + :param lat2: Latitude of point 2 + :type lat2: float + :param lon2: Longitude of point 2 + :type lon2: float + :return: An instance of DistAZ + :rtype: DistAZ + Subroutine to calculate the Great Circle Arc distance between two sets of geographic coordinates @@ -44,14 +60,13 @@ def __init__(self, lat1, lon1, lat2, lon2): ''' rad = 2. * np.pi / 360.0 - """ - c - c scolat and ecolat are the geocentric colatitudes - c as defined by Richter (pg. 318) - c - c Earth Flattening of 1/298.257 take from Bott (pg. 3) - c - """ + ''' + scolat and ecolat are the geocentric colatitudes + as defined by Richter (pg. 318) + + Earth Flattening of 1/298.257 take from Bott (pg. 3) + + ''' sph = 1.0 / 298.257 scolat = np.pi / 2.0 - np.arctan((1. - sph) * (1. - sph) * np.tan(lat1 * rad)) @@ -59,10 +74,10 @@ def __init__(self, lat1, lon1, lat2, lon2): slon = lon1 * rad elon = lon2 * rad """ - c - c a - e are as defined by Bullen (pg. 154, Sec 10.2) - c These are defined for the pt. 1 - c + + a - e are as defined by Bullen (pg. 154, Sec 10.2) + These are defined for the pt. 1 + """ a = np.sin(scolat) * np.cos(slon) b = np.sin(scolat) * np.sin(slon) diff --git a/pytomoatt/model.py b/pytomoatt/model.py index 671635c..c50475d 100644 --- a/pytomoatt/model.py +++ b/pytomoatt/model.py @@ -5,7 +5,7 @@ from .io.crustmodel import CrustModel from .io.asciimodel import ASCIIModel from .attarray import Dataset -from .utils import init_axis, acosd +from .utils import init_axis, acosd, atand class ATTModel(): @@ -68,8 +68,12 @@ def to_ani(self): """ self.epsilon = np.sqrt(self.eta**2+self.xi**2) self.phi = np.zeros_like(self.epsilon) - idx_non_zero = np.where(self.epsilon != 0) - self.phi[idx_non_zero] = 0.5*acosd(self.xi[idx_non_zero]/self.epsilon[idx_non_zero]) + idx = np.where(self.xi <= 0) + self.phi[idx] = 90 + 0.5*atand(self.eta[idx]/self.xi[idx]) + idx = np.where((self.xi > 0) & (self.eta <= 0)) + self.phi[idx] = 180 + 0.5*atand(self.eta[idx]/self.xi[idx]) + idx = np.where((self.xi > 0) & (self.eta > 0)) + self.phi[idx] = 0.5*atand(self.eta[idx]/self.xi[idx]) def to_xarray(self): """Convert to xarray @@ -139,6 +143,14 @@ def smooth(self, sigma=5.0): sigma_all[0:2] /= 111.19 self.vel = gaussian_filter(self.vel, sigma) + def calc_dv_avg(self): + """calculate anomalies relative to average velocity at each depth + """ + self.dlnv = np.zeros_like(self.vel) + for i, _ in enumerate(self.depths): + avg = np.mean(self.vel[i, :, :]) + self.dlnv[i, :, :] = 100 * (self.vel[i, :, :] - avg)/avg + def calc_dv(self, ref_mod_fname: str): """calculate anomalies relative to another model diff --git a/pytomoatt/src_rec.py b/pytomoatt/src_rec.py index 5d466ae..5b833d4 100644 --- a/pytomoatt/src_rec.py +++ b/pytomoatt/src_rec.py @@ -6,9 +6,12 @@ from .utils import WGS84_to_cartesian from scipy.spatial import distance from sklearn.metrics.pairwise import haversine_distances +import copy + pd.options.mode.chained_assignment = None # default='warn' -class SrcRec(): + +class SrcRec: """I/O for source <--> receiver file :param fname: Path to src_rec file @@ -16,12 +19,14 @@ class SrcRec(): :param src_only: Whether to read only source information, defaults to False :type src_only: bool, optional """ + def __init__(self, fname: str, src_only=False) -> None: - """ - """ + """ """ self.src_only = src_only self.src_points = None self.rec_points = None + self.sources = None + self.receivers = None self.fnames = [fname] self.log = SetupLog() @@ -38,7 +43,7 @@ def src_points(self): :return: All sources :rtype: pandas.DataFrame - + Sources contain 8 columns: ================ =================================================== @@ -61,7 +66,7 @@ def src_points(self, value): if value is None or isinstance(value, pd.DataFrame): self._src_points = value else: - raise TypeError('src_points should be in DataFrame') + raise TypeError("src_points should be in DataFrame") @property def rec_points(self): @@ -69,10 +74,10 @@ def rec_points(self): :return: All receivers :rtype: pandas.DataFrame - + Receivers contain 9 ~ 11 columns: - Common fields + Common fields ----------------- ================ ===================================================== @@ -101,16 +106,16 @@ def rec_points(self): """ return self._rec_points - + @rec_points.setter def rec_points(self, value): if value is None or isinstance(value, pd.DataFrame): self._rec_points = value else: - raise TypeError('rec_points should be in DataFrame') + raise TypeError("rec_points should be in DataFrame") @classmethod - def read(cls, fname:str, dist_in_data=False, name_net_and_sta=False, **kwargs): + def read(cls, fname: str, dist_in_data=False, name_net_and_sta=False, **kwargs): """Read source <--> receiver file to pandas.DataFrame :param fname: Path to src_rec file @@ -123,20 +128,21 @@ def read(cls, fname:str, dist_in_data=False, name_net_and_sta=False, **kwargs): :rtype: SrcRec """ sr = cls(fname=fname, **kwargs) - alldf = pd.read_table(fname, sep='\s+|\t', engine='python', - header=None, comment="#") + alldf = pd.read_table( + fname, sep="\s+|\t", engine="python", header=None, comment="#" + ) last_col_src = 12 # this is a source line if the last column is not NaN sr.src_points = alldf[pd.notna(alldf[last_col_src])] # add weight column if not included - if sr.src_points.shape[1] == last_col_src+1: + if sr.src_points.shape[1] == last_col_src + 1: # add another column for weight - sr.src_points.loc[:, last_col_src+1] = 1.0 + sr.src_points.loc[:, last_col_src + 1] = 1.0 # src id dataframe sr.src_points.index = sr.src_points.iloc[:, 0] - sr.src_points.index.name = 'src_index' + sr.src_points.index.name = "src_index" # event datetime dataframe datedf = sr.src_points.loc[:, 1:6] @@ -153,27 +159,38 @@ def read(cls, fname:str, dist_in_data=False, name_net_and_sta=False, **kwargs): except: sr.log.SrcReclog.error("please check the date format in the src_rec file") return sr.src_points - dateseris = datedf.astype(str).apply(lambda x: '.'.join(x), axis=1).apply( - pd.to_datetime, format='%Y.%m.%d.%H.%M.%S.%f') - dateseris.name = 'origin_time' + dateseris = ( + datedf.astype(str) + .apply(lambda x: ".".join(x), axis=1) + .apply(pd.to_datetime, format="%Y.%m.%d.%H.%M.%S.%f") + ) + dateseris.name = "origin_time" # event data dataframe src_data = sr.src_points.loc[:, 7:] - src_data.columns = ['evla', 'evlo', - 'evdp', 'mag', 'num_rec', - 'event_id', 'weight'] + src_data.columns = [ + "evla", + "evlo", + "evdp", + "mag", + "num_rec", + "event_id", + "weight", + ] type_dict = { - 'evla': float, - 'evlo': float, - 'evdp': float, - 'mag': float, - 'num_rec': int, - 'event_id': str, - 'weight': float + "evla": float, + "evlo": float, + "evdp": float, + "mag": float, + "num_rec": int, + "event_id": str, + "weight": float, } try: src_data = src_data.astype(type_dict) except: - sr.log.SrcReclog.error("please check the event data format in the src_rec file") + sr.log.SrcReclog.error( + "please check the event data format in the src_rec file" + ) return sr.src_points # concat all the 3 dataframes @@ -191,88 +208,161 @@ def read(cls, fname:str, dist_in_data=False, name_net_and_sta=False, **kwargs): last_col += 1 # extract the rows if the last_col is not NaN and the 12th column is NaN - sr.rec_points = alldf[(alldf[last_col].notna()) - & (alldf[last_col_src].isna())].reset_index(drop=True) + sr.rec_points = alldf[ + (alldf[last_col].notna()) & (alldf[last_col_src].isna()) + ].reset_index(drop=True) # add weight column if not included - if sr.rec_points.loc[:, last_col+1].isna().all(): - sr.rec_points.loc[:, last_col+1] = 1.0 + if sr.rec_points.loc[:, last_col + 1].isna().all(): + sr.rec_points.loc[:, last_col + 1] = 1.0 # warning if weigh value is greater than 10 - if (sr.rec_points.loc[:, last_col+1] > 10).any(): - sr.log.SrcReclog.warning(""" + if (sr.rec_points.loc[:, last_col + 1] > 10).any(): + sr.log.SrcReclog.warning( + """ at least one weight value is greater than 10. Probably your src_rec file includes distance data. -In this case, please set dist_in_data=True and read again.""") +In this case, please set dist_in_data=True and read again.""" + ) # extract only the first part of columns (cut unnecessary columns) - sr.rec_points = sr.rec_points.loc[:, :last_col+1] + sr.rec_points = sr.rec_points.loc[:, : last_col + 1] if name_net_and_sta == False: if not dist_in_data: sr.rec_points.columns = [ - 'src_index', 'rec_index', 'staname', - 'stla', 'stlo', 'stel', 'phase', 'tt', 'weight' + "src_index", + "rec_index", + "staname", + "stla", + "stlo", + "stel", + "phase", + "tt", + "weight", ] else: sr.rec_points.columns = [ - 'src_index', 'rec_index', 'staname', - 'stla', 'stlo', 'stel', 'phase', 'dist_deg', 'tt', 'weight' + "src_index", + "rec_index", + "staname", + "stla", + "stlo", + "stel", + "phase", + "dist_deg", + "tt", + "weight", ] else: if not dist_in_data: sr.rec_points.columns = [ - 'src_index', 'rec_index', 'netname', 'staname', - 'stla', 'stlo', 'stel', 'phase', 'tt', 'weight' + "src_index", + "rec_index", + "netname", + "staname", + "stla", + "stlo", + "stel", + "phase", + "tt", + "weight", ] else: sr.rec_points.columns = [ - 'src_index', 'rec_index', 'netname', 'staname', - 'stla', 'stlo', 'stel', 'phase', 'dist_deg', 'tt', 'weight' + "src_index", + "rec_index", + "netname", + "staname", + "stla", + "stlo", + "stel", + "phase", + "dist_deg", + "tt", + "weight", ] - + # change type of rec_index to int + sr.rec_points["rec_index"] = sr.rec_points["rec_index"].astype(int) # concatenate network and station name with "_" - sr.rec_points['staname'] = sr.rec_points['netname'] + '_' + sr.rec_points['staname'] + sr.rec_points["staname"] = ( + sr.rec_points["netname"] + "_" + sr.rec_points["staname"] + ) # drop network name column - sr.rec_points.drop('netname', axis=1, inplace=True) - + sr.rec_points.drop("netname", axis=1, inplace=True) + # define src and rec list + sr.sources = sr.src_points[ + ["event_id", "evla", "evlo", "evdp", "weight"] + ] + sr.receivers = sr.rec_points[ + ["staname", "stla", "stlo", "stel", "weight"] + ].drop_duplicates() return sr - def write(self, fname='src_rec_file'): + def write(self, fname="src_rec_file"): """Write sources and receivers to ASCII file for TomoATT :param fname: Path to the src_rec file, defaults to 'src_rec_file' :type fname: str, optional """ - with open(fname, 'w') as f: - for idx, src in tqdm.tqdm(self.src_points.iterrows(), - total=len(self.src_points)): - time_lst = src['origin_time'].strftime('%Y_%m_%d_%H_%M_%S.%f').split('_') - f.write('{:d} {} {} {} {} {} {} {:.4f} {:.4f} {:.4f} {:.4f} {} {} {:.4f}\n'.format( - idx, *time_lst, src['evla'], src['evlo'], src['evdp'], - src['mag'], src['num_rec'], src['event_id'], src['weight'] - )) + with open(fname, "w") as f: + for idx, src in tqdm.tqdm( + self.src_points.iterrows(), total=len(self.src_points) + ): + time_lst = ( + src["origin_time"].strftime("%Y_%m_%d_%H_%M_%S.%f").split("_") + ) + f.write( + "{:d} {} {} {} {} {} {} {:.4f} {:.4f} {:.4f} {:.4f} {} {} {:.4f}\n".format( + idx, + *time_lst, + src["evla"], + src["evlo"], + src["evdp"], + src["mag"], + src["num_rec"], + src["event_id"], + src["weight"], + ) + ) if self.src_only: continue - rec_data = self.rec_points[self.rec_points['src_index']==idx] + rec_data = self.rec_points[self.rec_points["src_index"] == idx] for _, rec in rec_data.iterrows(): - f.write(' {:d} {:d} {} {:6.4f} {:6.4f} {:6.4f} {} {:6.4f} {:6.4f}\n'.format( - idx, rec['rec_index'], rec['staname'], rec['stla'], - rec['stlo'], rec['stel'], rec['phase'], rec['tt'], rec['weight'] - )) + f.write( + " {:d} {:d} {} {:6.4f} {:6.4f} {:6.4f} {} {:6.4f} {:6.4f}\n".format( + idx, + rec["rec_index"], + rec["staname"], + rec["stla"], + rec["stlo"], + rec["stel"], + rec["phase"], + rec["tt"], + rec["weight"], + ) + ) + + def copy(self): + """Return a copy of SrcRec object + + :return: Copy of SrcRec object + :rtype: SrcRec + """ + return copy.deepcopy(self) def reset_index(self): - """Reset index of source and receivers. - """ + """Reset index of source and receivers.""" # reset src_index to be 0, 1, 2, ... for both src_points and rec_points - self.rec_points['src_index'] = self.rec_points['src_index'].map( - dict(zip(self.src_points.index, np.arange(len(self.src_points))))) + self.rec_points["src_index"] = self.rec_points["src_index"].map( + dict(zip(self.src_points.index, np.arange(len(self.src_points)))) + ) self.src_points.index = np.arange(len(self.src_points)) - self.src_points.index.name = 'src_index' + self.src_points.index.name = "src_index" # reset rec_index to be 0, 1, 2, ... for rec_points - self.rec_points['rec_index'] = self.rec_points.groupby('src_index').cumcount() - #sr.rec_points['rec_index'] = sr.rec_points['rec_index'].astype(int) + self.rec_points["rec_index"] = self.rec_points.groupby("src_index").cumcount() + # sr.rec_points['rec_index'] = sr.rec_points['rec_index'].astype(int) def append(self, sr): """ @@ -282,32 +372,37 @@ def append(self, sr): :type sr: SrcRec """ if not isinstance(sr, SrcRec): - raise TypeError('Input must be a SrcRec object') + raise TypeError("Input must be a SrcRec object") if self.src_only != sr.src_only: - raise ValueError('Cannot append src_only and non-src_only SrcRec objects') + raise ValueError("Cannot append src_only and non-src_only SrcRec objects") + + self.reset_index() + sr.reset_index() # number of sources to be added n_src_offset = self.src_points.shape[0] # add column for source file tag if not included - if 'fname' not in self.src_points.columns: - self.src_points['fname'] = self.fnames[0] - self.rec_points['fname'] = self.fnames[0] - if 'fname' not in sr.src_points.columns: - sr.src_points['fname'] = sr.fnames[0] - sr.rec_points['fname'] = sr.fnames[0] + if "fname" not in self.src_points.columns: + self.src_points["fname"] = self.fnames[0] + self.rec_points["fname"] = self.fnames[0] + if "fname" not in sr.src_points.columns: + sr.src_points["fname"] = sr.fnames[0] + sr.rec_points["fname"] = sr.fnames[0] # append src_points self.src_points = pd.concat([self.src_points, sr.src_points], ignore_index=True) - self.src_points.index.name = 'src_index' - self.src_points.index += 1 # start from 1 + self.src_points.index.name = "src_index" + # self.src_points.index += 1 # start from 1 if not self.src_only: # update src_index in rec_points - sr.rec_points['src_index'] += n_src_offset + sr.rec_points["src_index"] += n_src_offset # append rec_points - self.rec_points = pd.concat([self.rec_points, sr.rec_points], ignore_index=True) + self.rec_points = pd.concat( + [self.rec_points, sr.rec_points], ignore_index=True + ) # store fnames self.fnames.extend(sr.fnames) @@ -317,31 +412,40 @@ def remove_rec_by_new_src(self, verbose=True): remove rec_points by new src_points """ if verbose: - self.log.SrcReclog.info('rec_points before removing: {}'.format(self.rec_points.shape)) - self.rec_points = self.rec_points[self.rec_points['src_index'].isin(self.src_points.index)] + self.log.SrcReclog.info( + "rec_points before removing: {}".format(self.rec_points.shape) + ) + self.rec_points = self.rec_points[ + self.rec_points["src_index"].isin(self.src_points.index) + ] if verbose: - self.log.SrcReclog.info('rec_points after removing: {}'.format(self.rec_points.shape)) - + self.log.SrcReclog.info( + "rec_points after removing: {}".format(self.rec_points.shape) + ) + def remove_src_by_new_rec(self): - """remove src_points by new receivers - """ - self.src_points = self.src_points[self.src_points.index.isin(self.rec_points['src_index'])] + """remove src_points by new receivers""" + self.src_points = self.src_points[ + self.src_points.index.isin(self.rec_points["src_index"]) + ] def update_num_rec(self): """ update num_rec in src_points by current rec_points """ - self.src_points['num_rec'] = self.rec_points.groupby('src_index').size() + self.src_points["num_rec"] = self.rec_points.groupby("src_index").size() def erase_src_with_no_rec(self): """ erase src_points with no rec_points """ - print('src_points before removing: ', self.src_points.shape) - self.src_points = self.src_points[self.src_points['num_rec'] > 0] - print('src_points after removing: ', self.src_points.shape) + print("src_points before removing: ", self.src_points.shape) + self.src_points = self.src_points[self.src_points["num_rec"] > 0] + print("src_points after removing: ", self.src_points.shape) - def erase_duplicate_events(self, thre_deg:float, thre_dep:float, thre_time_in_min:float): + def erase_duplicate_events( + self, thre_deg: float, thre_dep: float, thre_time_in_min: float + ): """ check and count how many events are duplicated, under given threshold of distance, depth, and time. @@ -360,46 +464,94 @@ def erase_duplicate_events(self, thre_deg:float, thre_dep:float, thre_time_in_mi num_duplicated = 99999 iter_count = 0 - while (num_duplicated > 0): - + while num_duplicated > 0: # difference with row +1 - self.src_points['diff_evlo+1'] = self.src_points['evlo'].diff().abs() - self.src_points['diff_evla+1'] = self.src_points['evla'].diff().abs() - self.src_points['diff_evdp+1'] = self.src_points['evdp'].diff().abs() - self.src_points['diff_time+1'] = self.src_points['origin_time'].diff().abs() - self.src_points['diff_nrec+1'] = self.src_points['num_rec'].diff() + self.src_points["diff_evlo+1"] = self.src_points["evlo"].diff().abs() + self.src_points["diff_evla+1"] = self.src_points["evla"].diff().abs() + self.src_points["diff_evdp+1"] = self.src_points["evdp"].diff().abs() + self.src_points["diff_time+1"] = self.src_points["origin_time"].diff().abs() + self.src_points["diff_nrec+1"] = self.src_points["num_rec"].diff() # difference with row -1 - self.src_points['diff_evlo-1'] = self.src_points['evlo'].diff(periods=-1).abs() - self.src_points['diff_evla-1'] = self.src_points['evla'].diff(periods=-1).abs() - self.src_points['diff_evdp-1'] = self.src_points['evdp'].diff(periods=-1).abs() - self.src_points['diff_time-1'] = self.src_points['origin_time'].diff(periods=-1).abs() - self.src_points['diff_nrec-1'] = self.src_points['num_rec'].diff(periods=-1) - - self.src_points["duplicated+1"] = self.src_points.apply(lambda x: 1 if x['diff_evlo+1'] 0) - self.src_points = self.src_points[~((self.src_points['duplicated-1']==1) & (self.src_points['diff_nrec-1']<=0))] + self.src_points = self.src_points[ + ~( + (self.src_points["duplicated-1"] == 1) + & (self.src_points["diff_nrec-1"] <= 0) + ) + ] # print iterate count and number of rows, number of duplicated rows - num_duplicated = self.src_points[(self.src_points["duplicated+1"]==1) | (self.src_points["duplicated-1"]==1)].shape[0] - self.log.SrcReclog.info("iteration: {}; num_duplicated: {}".format(iter_count, num_duplicated)) + num_duplicated = self.src_points[ + (self.src_points["duplicated+1"] == 1) + | (self.src_points["duplicated-1"] == 1) + ].shape[0] + self.log.SrcReclog.info( + "iteration: {}; num_duplicated: {}".format(iter_count, num_duplicated) + ) iter_count += 1 # erase all columns starting with diff_* - self.src_points.drop(self.src_points.columns[self.src_points.columns.str.startswith('diff_')], axis=1, inplace=True) + self.src_points.drop( + self.src_points.columns[self.src_points.columns.str.startswith("diff_")], + axis=1, + inplace=True, + ) # erase all clumuns starting with duplicated - self.src_points.drop(self.src_points.columns[self.src_points.columns.str.startswith('duplicated')], axis=1, inplace=True) + self.src_points.drop( + self.src_points.columns[ + self.src_points.columns.str.startswith("duplicated") + ], + axis=1, + inplace=True, + ) # remove rec_points by new src_points self.remove_rec_by_new_src() # sort by src_index - self.src_points.sort_values(by=['src_index'], inplace=True) - self.rec_points.sort_values(by=['src_index', 'rec_index'], inplace=True) + self.src_points.sort_values(by=["src_index"], inplace=True) + self.rec_points.sort_values(by=["src_index", "rec_index"], inplace=True) def select_phase(self, phase_list): """ @@ -409,17 +561,68 @@ def select_phase(self, phase_list): :type phase_list: list of str """ if not isinstance(phase_list, (list, str)): - raise TypeError('phase_list should be in list or str') - self.log.SrcReclog.info('rec_points before selecting: {}'.format(self.rec_points.shape)) - self.rec_points = self.rec_points[self.rec_points['phase'].isin(phase_list)] - self.log.SrcReclog.info('rec_points after selecting: {}'.format(self.rec_points.shape)) + raise TypeError("phase_list should be in list or str") + self.log.SrcReclog.info( + "rec_points before selecting: {}".format(self.rec_points.shape) + ) + self.rec_points = self.rec_points[self.rec_points["phase"].isin(phase_list)] + self.log.SrcReclog.info( + "rec_points after selecting: {}".format(self.rec_points.shape) + ) # modify num_rec in src_points - self.src_points['num_rec'] = self.rec_points.groupby('src_index').size() + self.src_points["num_rec"] = self.rec_points.groupby("src_index").size() # sort by src_index - self.src_points.sort_values(by=['src_index'], inplace=True) - self.rec_points.sort_values(by=['src_index', 'rec_index'], inplace=True) + self.src_points.sort_values(by=["src_index"], inplace=True) + self.rec_points.sort_values(by=["src_index", "rec_index"], inplace=True) + + def select_by_datetime(self, time_range): + """ + select sources and station in a time range + + :param time_range: Time range defined as [start_time, end_time] + :type time_range: iterable + """ + # select source within this time range. + self.log.SrcReclog.info( + "src_points before selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info( + "rec_points before selecting: {}".format(self.rec_points.shape) + ) + self.src_points = self.src_points[ + (self.src_points["origin_time"] >= time_range[0]) + & (self.src_points["origin_time"] <= time_range[1]) + ] + + # Remove receivers whose events have been removed + self.remove_rec_by_new_src(verbose=False) + + self.reset_index() + self.log.SrcReclog.info( + "src_points after selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info( + "rec_points after selecting: {}".format(self.rec_points.shape) + ) + + def remove_specified_recs(self, rec_list): + """Remove specified receivers + + :param rec_list: List of receivers to be removed + :type rec_list: list + """ + self.log.SrcReclog.info( + "rec_points before removing: {}".format(self.rec_points.shape) + ) + self.rec_points = self.rec_points[~self.rec_points["staname"].isin(rec_list)] + self.remove_src_by_new_rec() + self.update_num_rec() + self.reset_index() + self.log.SrcReclog.info( + "rec_points after removing: {}".format(self.rec_points.shape) + ) def select_box_region(self, region): """ @@ -429,13 +632,17 @@ def select_box_region(self, region): :type region: iterable """ # select source within this region. - self.log.SrcReclog.info('src_points before selecting: {}'.format(self.src_points.shape)) - self.log.SrcReclog.info('rec_points before selecting: {}'.format(self.rec_points.shape)) + self.log.SrcReclog.info( + "src_points before selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info( + "rec_points before selecting: {}".format(self.rec_points.shape) + ) self.src_points = self.src_points[ - (self.src_points['evlo'] >= region[0]) & - (self.src_points['evlo'] <= region[1]) & - (self.src_points['evla'] >= region[2]) & - (self.src_points['evla'] <= region[3]) + (self.src_points["evlo"] >= region[0]) + & (self.src_points["evlo"] <= region[1]) + & (self.src_points["evla"] >= region[2]) + & (self.src_points["evla"] <= region[3]) ] # Remove receivers whose events have been removed @@ -443,17 +650,24 @@ def select_box_region(self, region): # Remove rest receivers out of region. self.rec_points = self.rec_points[ - (self.rec_points['stlo'] >= region[0]) & - (self.rec_points['stlo'] <= region[1]) & - (self.rec_points['stla'] >= region[2]) & - (self.rec_points['stla'] <= region[3]) + (self.rec_points["stlo"] >= region[0]) + & (self.rec_points["stlo"] <= region[1]) + & (self.rec_points["stla"] >= region[2]) + & (self.rec_points["stla"] <= region[3]) ] # Remove empty sources - self.src_points = self.src_points[self.src_points.index.isin(self.rec_points['src_index'])] + self.src_points = self.src_points[ + self.src_points.index.isin(self.rec_points["src_index"]) + ] self.update_num_rec() - self.log.SrcReclog.info('src_points after selecting: {}'.format(self.src_points.shape)) - self.log.SrcReclog.info('rec_points after selecting: {}'.format(self.rec_points.shape)) + self.reset_index() + self.log.SrcReclog.info( + "src_points after selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info( + "rec_points after selecting: {}".format(self.rec_points.shape) + ) def select_depth(self, dep_min_max): """Select sources in a range of depth @@ -471,81 +685,126 @@ def select_depth(self, dep_min_max): self.log.SrcReclog.info('src_points after selecting: {}'.format(self.src_points.shape)) def calc_distance(self): - """Calculate epicentral distance - """ - self.rec_points['dist'] = 0. - rec_group = self.rec_points.groupby('src_index') + """Calculate epicentral distance""" + self.rec_points["dist"] = 0.0 + rec_group = self.rec_points.groupby("src_index") for idx, rec in rec_group: dist = DistAZ( - self.src_points.loc[idx]['evla'], - self.src_points.loc[idx]['evlo'], - rec['stla'].values, rec['stlo'].values + self.src_points.loc[idx]["evla"], + self.src_points.loc[idx]["evlo"], + rec["stla"].values, + rec["stlo"].values, ).delta - self.rec_points['dist'].loc[rec.index] = dist + self.rec_points["dist"].loc[rec.index] = dist def select_distance(self, dist_min_max, recalc_dist=False): - """Select stations in a range of distance + """Select stations in a range of distance - :param dist_min_max: limit of distance, ``[dist_min, dist_max]`` + :param dist_min_max: limit of distance in deg, ``[dist_min, dist_max]`` :type dist_min_max: list or tuple """ - self.log.SrcReclog.info('rec_points before selecting: {}'.format(self.rec_points.shape)) + self.log.SrcReclog.info( + "rec_points before selecting: {}".format(self.rec_points.shape) + ) # rec_group = self.rec_points.groupby('src_index') - if ('dist' not in self.rec_points) or recalc_dist: - self.log.SrcReclog.info('Calculating epicentral distance...') + if ("dist" not in self.rec_points) or recalc_dist: + self.log.SrcReclog.info("Calculating epicentral distance...") self.calc_distance() elif not recalc_dist: pass else: - self.log.SrcReclog.error('No such field of dist, please set up recalc_dist to True') + self.log.SrcReclog.error( + "No such field of dist, please set up recalc_dist to True" + ) # for _, rec in rec_group: - mask = (self.rec_points['dist'] < dist_min_max[0]) \ - | (self.rec_points['dist'] > dist_min_max[1]) + mask = (self.rec_points["dist"] < dist_min_max[0]) | ( + self.rec_points["dist"] > dist_min_max[1] + ) drop_idx = self.rec_points[mask].index self.rec_points = self.rec_points.drop(index=drop_idx) self.remove_src_by_new_rec() self.update_num_rec() - self.log.SrcReclog.info('rec_points after selecting: {}'.format(self.rec_points.shape)) + self.reset_index() + self.log.SrcReclog.info( + "rec_points after selecting: {}".format(self.rec_points.shape) + ) + + def select_by_num_rec(self, num_rec: int): + """select sources with recievers greater and equal than a number + :param num_rec: threshold of minimum receiver number + :type num_rec: int + """ + self.update_num_rec() + self.log.SrcReclog.info( + "src_points before selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info( + "rec_points before selecting: {}".format(self.rec_points.shape) + ) + self.src_points = self.src_points[(self.src_points["num_rec"] >= num_rec)] + self.remove_rec_by_new_src(False) + self.log.SrcReclog.info( + "src_points after selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info( + "rec_points after selecting: {}".format(self.rec_points.shape) + ) - def select_one_event_in_each_subgrid(self, d_deg:float, d_km:float): - """ select one event in each subgrid - - :param d_deg: grid size in degree + def select_one_event_in_each_subgrid(self, d_deg: float, d_km: float): + """select one event in each subgrid + + :param d_deg: grid size along lat and lon in degree :type d_deg: float - :param d_km: grid size in km + :param d_km: grid size along depth axis in km :type d_km: float """ - - self.log.SrcReclog.info('src_points before selecting: {}'.format(self.src_points.shape)) - self.log.SrcReclog.info('processing... (this may take a few minutes)') + + self.log.SrcReclog.info( + "src_points before selecting: {}".format(self.src_points.shape) + ) + self.log.SrcReclog.info("processing... (this may take a few minutes)") # store index of src_points as 'src_index' - self.src_points['src_index'] = self.src_points.index + self.src_points["src_index"] = self.src_points.index # add 'lat_group' and 'lon_group' to src_points by module d_deg - self.src_points['lat_group'] = self.src_points['evla'].apply(lambda x: int(x/d_deg)) - self.src_points['lon_group'] = self.src_points['evlo'].apply(lambda x: int(x/d_deg)) + self.src_points["lat_group"] = self.src_points["evla"].apply( + lambda x: int(x / d_deg) + ) + self.src_points["lon_group"] = self.src_points["evlo"].apply( + lambda x: int(x / d_deg) + ) # add 'dep_group' to src_points by module d_km - self.src_points['dep_group'] = self.src_points['evdp'].apply(lambda x: int(x/d_km)) + self.src_points["dep_group"] = self.src_points["evdp"].apply( + lambda x: int(x / d_km) + ) # sort src_points by 'lat_group' and 'lon_group' and 'dep_group' - self.src_points = self.src_points.sort_values(by=['lat_group', 'lon_group', 'dep_group']) + self.src_points = self.src_points.sort_values( + by=["lat_group", "lon_group", "dep_group"] + ) # find all events in the same lat_group and lon_group and dep_group # and keep only on with largest nrec - self.src_points = self.src_points.groupby(['lat_group', 'lon_group', 'dep_group']).apply(lambda x: x.sort_values(by='num_rec', ascending=False).iloc[0]) + self.src_points = self.src_points.groupby( + ["lat_group", "lon_group", "dep_group"] + ).apply(lambda x: x.sort_values(by="num_rec", ascending=False).iloc[0]) # drop 'lat_group' and 'lon_group' and 'dep_group' - self.src_points = self.src_points.drop(columns=['lat_group', 'lon_group', 'dep_group']) + self.src_points = self.src_points.drop( + columns=["lat_group", "lon_group", "dep_group"] + ) # restore index from 'src_index' - self.src_points = self.src_points.set_index('src_index') + self.src_points = self.src_points.set_index("src_index") # sort src_points by index self.src_points = self.src_points.sort_index() - self.log.SrcReclog.info('src_points after selecting: {}'.format(self.src_points.shape)) + self.log.SrcReclog.info( + "src_points after selecting: {}".format(self.src_points.shape) + ) # remove rec_points by new src_points self.remove_rec_by_new_src() @@ -555,17 +814,21 @@ def count_events_per_station(self): count events per station """ # count the number of staname - self.rec_points['num_events'] = self.rec_points.groupby('staname').cumcount() + 1 + self.rec_points["num_events"] = ( + self.rec_points.groupby("staname").cumcount() + 1 + ) # reflect the total number of events for each station - self.rec_points['num_events'] = self.rec_points.groupby('staname')['num_events'].transform('max') + self.rec_points["num_events"] = self.rec_points.groupby("staname")[ + "num_events" + ].transform("max") def _calc_weights(self, lat, lon, scale): points = pd.concat([lon, lat], axis=1) - points_rad =points *(np.pi / 180) - dist = haversine_distances(points_rad)* 6371.0/111.19 - dist_ref = scale*np.mean(dist) - om = np.exp(-(dist/dist_ref)**2)*points.shape[0] - return 1/np.mean(om, axis=0) + points_rad = points * (np.pi / 180) + dist = haversine_distances(points_rad) * 6371.0 / 111.19 + dist_ref = scale * np.mean(dist) + om = np.exp(-((dist / dist_ref) ** 2)) * points.shape[0] + return 1 / np.mean(om, axis=0) def geo_weighting(self, scale=0.5, rec_weight=False): """Calculating geographical weights for sources @@ -574,23 +837,23 @@ def geo_weighting(self, scale=0.5, rec_weight=False): The reference distance is given by ``scale``* dis_average, defaults to 0.5 :type scale: float, optional """ - - self.src_points['weight'] = self._calc_weights( - self.src_points['evla'], - self.src_points['evlo'], - scale + + self.src_points["weight"] = self._calc_weights( + self.src_points["evla"], self.src_points["evlo"], scale ) if rec_weight: - # self.rec_points['weight'] = self._calc_weights( - # self.src_points['stla'], - # self.src_points['stlo'], - # scale - # ) - pass # TODO: add weights for receivers + weights = self._calc_weights( + self.receivers['stla'], + self.receivers['stlo'], + scale + ) + # apply weights to rec_points + for staname, weight in zip(self.receivers['staname'], weights): + self.rec_points.loc[self.rec_points['staname'] == staname, 'weight'] = weight # # This function is comment out temprarly because it includes verified bug and not modified. # - #def merge_adjacent_stations(self, d_deg:float, d_km:float): + # def merge_adjacent_stations(self, d_deg:float, d_km:float): # """ # merge adjacent stations as one station # d_deg : float @@ -638,7 +901,7 @@ def geo_weighting(self, scale=0.5, rec_weight=False): # # This function is comment out temprarly because it includes verified bug and not modified. # - #def merge_duplicated_station(self): + # def merge_duplicated_station(self): # """ # merge duplicated stations as one station # duplicated stations are defined as stations with the same staname @@ -662,39 +925,47 @@ def geo_weighting(self, scale=0.5, rec_weight=False): # # number of unique stations after merging # print('number of unique stations after merging: ', self.rec_points['staname'].nunique()) - def add_noise(self, range_in_sec=0.1): + def add_noise(self, range_in_sec=0.1, mean_in_sec=0.0, shape="gaussian"): """Add random noise on travel time + :param mean_in_sec: Mean of the noise in sec, defaults to 0.0 + :type mean_in_sec: float, optional :param range_in_sec: Maximun noise in sec, defaults to 0.1 :type range_in_sec: float, optional - """ - noise = np.random.uniform( - low=-range_in_sec, - high=range_in_sec, - size=self.rec_points.shape[0] - ) - self.rec_points['tt'] += noise - - def write_receivers(self, fname:str): + :param shape: shape of the noise distribution probability + :type shape: str. options: gaussian or uniform + """ + if shape == "uniform": + noise = ( + np.random.uniform( + low=-range_in_sec, high=range_in_sec, size=self.rec_points.shape[0] + ) + + mean_in_sec + ) + elif shape == "gaussian": + noise = np.random.normal( + loc=mean_in_sec, scale=range_in_sec, size=self.rec_points.shape[0] + ) + self.rec_points["tt"] += noise + + def write_receivers(self, fname: str): """ Write receivers to a txt file. :param fname: Path to output txt file of receivers """ - recs = self.rec_points[['staname', 'stla', 'stlo', 'stel', 'weight']].drop_duplicates() - recs.to_csv(fname, sep=' ', header=False, index=False) + self.receivers.to_csv(fname, sep=" ", header=False, index=False) - def write_sources(self, fname:str): + def write_sources(self, fname: str): """ Write sources to a txt file. :param fname: Path to output txt file of sources """ - srcs = self.src_points[['event_id', 'evla', 'evlo', 'evdp', 'weight']] - srcs.to_csv(fname, sep=' ', header=False, index=False) + self.sources.to_csv(fname, sep=" ", header=False, index=False) @classmethod - def from_seispy(cls, rf_path:str): + def from_seispy(cls, rf_path: str): """Read and convert source and station information from receiver function data calculated by Seispy @@ -704,7 +975,8 @@ def from_seispy(cls, rf_path:str): :rtype: SrcRec """ from .io.seispy import Seispy - sr = cls('') + + sr = cls("") # Initial an instance of Seispy seispyio = Seispy(rf_path) @@ -716,7 +988,7 @@ def from_seispy(cls, rf_path:str): # Convert to SrcRec format sr.src_points, sr.rec_points = seispyio.to_src_rec_points() - + # update number of receivers sr.update_num_rec() @@ -730,14 +1002,15 @@ def plot(self, weight=False, fname=None): :type weight: bool, optional :param fname: Path to output file, defaults to None :type fname: str, optional - :return: _description_ - :rtype: _type_ + :return: matplotlib figure + :rtype: matplotlib.figure.Figure """ from .vis import plot_srcrec + return plot_srcrec(self, weight=weight, fname=fname) -if __name__ == '__main__': - sr = SrcRec.read('src_rec_file_checker_data_test1.dat_noised_evweighted') +if __name__ == "__main__": + sr = SrcRec.read("src_rec_file_checker_data_test1.dat_noised_evweighted") sr.write() - print(sr.rec_points) \ No newline at end of file + print(sr.rec_points) diff --git a/setup.py b/setup.py index 146ea46..105c5cd 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ long_description = fh.read() -VERSION = "0.1.6" +VERSION = "0.1.8" setup(name='PyTomoATT', version=VERSION, author='Mijian Xu, Masaru Nagaso', diff --git a/test/test_src_rec.py b/test/test_src_rec.py index d8c7c59..98fa403 100644 --- a/test/test_src_rec.py +++ b/test/test_src_rec.py @@ -18,7 +18,13 @@ def test_subcase_02(self): def test_subcase_03(self): sr = SrcRec.read(self.fname) - sr.geo_weighting() + sr.geo_weighting(rec_weight=True) + sr.write('test_src_rec_eg') + + def test_subcase_04(self): + sr = SrcRec.read(self.fname) + sr.add_noise() + sr.add_noise(shape='uniform') if __name__ == '__main__': tsr = TestSrcRec()