diff --git a/README.md b/README.md
index c13c485..69a767d 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
# Analysis Platform
```
-2023-11-10: Released version 4.5.0! see RELEASE.md for details.
+2024-01-15: Released version 4.5.1! see RELEASE.md for details.
```
Analysis Platform is an open source web application to import, connect and visualize factory IoT data. It helps to collect, link and integrate data from multiple data sources.
diff --git a/RELEASE.md b/RELEASE.md
index df92776..06a3d30 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,5 +1,32 @@
# Releases
+## v4.5.1
+
+This version is a minor update including some bug fixes
+
+Improvements
+
+* Common
+ * Support new format of datetime: 'dd/mm/YYYY'
+ * Detect serial column and auto-check in Process Config page
+
+* V2 data
+ * Enable importing 'WorkType, Quality, LotNo, TrayNo'
+ * Enable importing alphabetical column names
+ * Extract vertical data without measurement label in value: '計測値:|measurement.'
+ * Support to detect process name if there is no value
+ * Enable selecting/importing duplicated column names
+
+Bug fixes
+
+* Common
+ * Fixed to appropriately modify English Name of Process Config page
+ * Fixed an issue when importing same column name from CSV data
+ * Fixed an issue of AgP page about summarized data using tz_convert for datetime
+* V2 data
+ * Fixed to import abnormal columns
+ * Fixed issue of 'undefined' value when preview data from Process Config page
+
## v4.5.0
Core changes
diff --git a/VERSION b/VERSION
index 018b130..8da8af6 100644
--- a/VERSION
+++ b/VERSION
@@ -1,4 +1,4 @@
-v4.5.0.206.eccc3d8c
+v4.5.1.faa01916
1
OSS
diff --git a/ap/api/aggregate_plot/services.py b/ap/api/aggregate_plot/services.py
index a3979a2..1886466 100644
--- a/ap/api/aggregate_plot/services.py
+++ b/ap/api/aggregate_plot/services.py
@@ -98,7 +98,7 @@ def gen_agp_data(dic_param: DicParam):
graph_param: DicParam
if graph_param.common.divide_format is not None:
- df = convert_utc_to_local_time_and_offset(df, graph_param)
+ # df = convert_utc_to_local_time_and_offset(df, graph_param)
df = gen_divide_format_column(
df, graph_param.common.divide_calendar_dates, graph_param.common.divide_calendar_labels
)
@@ -181,7 +181,7 @@ def gen_divide_format_column(
if df.empty:
return df
df.sort_values(Cycle.time.key, inplace=True)
- dt = df[Cycle.time.key]
+ dt = pd.to_datetime(df[Cycle.time.key])
divide_calendar_dates = pd.to_datetime(divide_calendar_dates, utc=True)
for i, label in enumerate(divide_calendar_labels):
start_time = divide_calendar_dates[i]
diff --git a/ap/api/heatmap/services.py b/ap/api/heatmap/services.py
index 2bc8333..c257adb 100644
--- a/ap/api/heatmap/services.py
+++ b/ap/api/heatmap/services.py
@@ -45,6 +45,7 @@
NA_STR,
NOT_EXACT_MATCH_FILTER_IDS,
TIME_COL,
+ TIME_COL_LOCAL,
UNIQUE_CATEGORIES,
UNIQUE_SERIAL,
UNMATCHED_FILTER_IDS,
@@ -138,7 +139,7 @@ def get_utc_offset(time_zone):
return time_offset
-def limit_num_cells(df_cells: pd.DataFrame, end_tm, client_tz, limit=10000):
+def limit_num_cells(df_cells: pd.DataFrame, end_tm, limit=10000):
"""Limit number of cells to 10k including empty cells"""
# is_res_limited = df_cells.index.size > limit
@@ -148,7 +149,7 @@ def limit_num_cells(df_cells: pd.DataFrame, end_tm, client_tz, limit=10000):
# update new end_time to 10000 cells
last_cell_time = list(df_cells.tail(1)[TIME_COL])[0]
# end_tm is utc -> convert to local-time
- end_tm_tz = pd.to_datetime(pd.Series([end_tm]), utc=True).dt.tz_convert(client_tz)
+ end_tm_tz = pd.to_datetime(pd.Series([end_tm]), utc=True)
end_tm_tz = list(end_tm_tz)[0]
new_end_time = np.minimum(end_tm_tz, last_cell_time)
new_end_tm = new_end_time.strftime(DATE_FORMAT_QUERY)
@@ -376,21 +377,24 @@ def gen_plotly_data(
@log_execution_time()
@abort_process_handler()
-def gen_agg_col(df: pd.DataFrame, hm_mode, hm_step):
+def gen_agg_col(df: pd.DataFrame, hm_mode, hm_step, client_tz):
"""Aggregate data by time"""
pd_step = convert_to_pandas_step(hm_step, hm_mode)
+ df[TIME_COL_LOCAL] = pd.to_datetime(df[TIME_COL], utc=True).dt.tz_convert(tz=client_tz)
print(df.index.size)
if hm_mode == 7:
# .astype(str).str[:13] or 16 sometimes doesn't work as expected
- df[AGG_COL] = df[TIME_COL].dt.floor(pd_step).dt.strftime('%Y-%m-%d %H')
+ df[AGG_COL] = df[TIME_COL_LOCAL].dt.floor(pd_step).dt.strftime('%Y-%m-%d %H')
else:
- df[AGG_COL] = df[TIME_COL].dt.floor(pd_step).dt.strftime('%Y-%m-%d %H:%M')
+ df[AGG_COL] = df[TIME_COL_LOCAL].dt.floor(pd_step).dt.strftime('%Y-%m-%d %H:%M')
return df
def gen_weekly_ticks(df: pd.DataFrame):
# tick weekly, first day of week, sunday
- df['x_label'] = df[TIME_COL] - (df[TIME_COL].dt.weekday % 7) * np.timedelta64(1, 'D')
+ df['x_label'] = df[TIME_COL_LOCAL] - (df[TIME_COL_LOCAL].dt.weekday % 7) * np.timedelta64(
+ 1, 'D'
+ )
df['x_label'] = (
get_year_week_in_df_column(df['x_label'])
+ '
'
@@ -404,11 +408,11 @@ def gen_weekly_ticks(df: pd.DataFrame):
def gen_daily_ticks(df: pd.DataFrame):
# tick weekly, first day of week, sunday
df['x_label'] = (
- get_year_week_in_df_column(df[TIME_COL])
+ get_year_week_in_df_column(df[TIME_COL_LOCAL])
+ '
'
- + df[TIME_COL].dt.month.astype(str).str.pad(2, fillchar='0')
+ + df[TIME_COL_LOCAL].dt.month.astype(str).str.pad(2, fillchar='0')
+ '-'
- + df[TIME_COL].dt.day.astype(str).str.pad(2, fillchar='0')
+ + df[TIME_COL_LOCAL].dt.day.astype(str).str.pad(2, fillchar='0')
)
return df['x_label']
@@ -422,11 +426,6 @@ def get_year_week_in_df_column(column: pd.DataFrame.columns):
)
-def convert_cell_tz(df: pd.DataFrame, offset):
- df[TIME_COL] = df[TIME_COL] + offset
- return df
-
-
@log_execution_time()
@abort_process_handler()
def gen_x_y(df: pd.DataFrame, hm_mode, hm_step, start_tm, end_tm):
@@ -439,18 +438,18 @@ def gen_x_y(df: pd.DataFrame, hm_mode, hm_step, start_tm, end_tm):
if hm_mode == 7:
# gen y
row_per_day = int(24 / hm_step)
- df['dayofweek'] = df[TIME_COL].dt.day_name().astype(str).str[:3]
- df['newdayofweek'] = (16 - df[TIME_COL].dt.dayofweek) % 10 # mon, tue... sat
+ df['dayofweek'] = df[TIME_COL_LOCAL].dt.day_name().astype(str).str[:3]
+ df['newdayofweek'] = (16 - df[TIME_COL_LOCAL].dt.dayofweek) % 10 # mon, tue... sat
df['y'] = (
int(24 / hm_step)
- - (df[TIME_COL].dt.hour / hm_step).astype(int)
+ - (df[TIME_COL_LOCAL].dt.hour / hm_step).astype(int)
+ df['newdayofweek'] * row_per_day
)
# gen x
- df['year'] = df[TIME_COL].dt.year
+ df['year'] = df[TIME_COL_LOCAL].dt.year
min_year = df['year'].min()
- df['x'] = df[TIME_COL].dt.strftime('%U').astype(int) + (df['year'] % min_year) * 53
+ df['x'] = df[TIME_COL_LOCAL].dt.strftime('%U').astype(int) + (df['year'] % min_year) * 53
# x_label
if num_days <= 140:
@@ -458,62 +457,64 @@ def gen_x_y(df: pd.DataFrame, hm_mode, hm_step, start_tm, end_tm):
elif num_days <= 365 * 2:
# tick monthly
df['x_label'] = (
- get_year_week_in_df_column(df[TIME_COL])
+ get_year_week_in_df_column(df[TIME_COL_LOCAL])
+ '
'
- + df[TIME_COL].dt.month.astype(str).str.pad(2, fillchar='0')
+ + df[TIME_COL_LOCAL].dt.month.astype(str).str.pad(2, fillchar='0')
+ '-01'
)
else:
# tick yearly
- df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) + '
01-01'
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL_LOCAL]) + '
01-01'
else:
# gen y
num_rows = int(1440 / hm_step)
row_per_hour = 60 / hm_step
- df['dayofweek'] = df[TIME_COL].dt.day_name().astype(str).str[:3]
+ df['dayofweek'] = df[TIME_COL_LOCAL].dt.day_name().astype(str).str[:3]
if hm_step > 60:
df['y'] = num_rows - (
- ((df[TIME_COL].dt.minute + df[TIME_COL].dt.hour * 60) / hm_step).astype(float)
+ ((df[TIME_COL_LOCAL].dt.minute + df[TIME_COL_LOCAL].dt.hour * 60) / hm_step).astype(
+ float
+ )
)
else:
df['y'] = num_rows - (
- (df[TIME_COL].dt.minute / hm_step).astype(int)
- + (df[TIME_COL].dt.hour * row_per_hour).astype(int)
+ (df[TIME_COL_LOCAL].dt.minute / hm_step).astype(int)
+ + (df[TIME_COL_LOCAL].dt.hour * row_per_hour).astype(int)
)
# gen x
- df['year'] = df[TIME_COL].dt.year
+ df['year'] = df[TIME_COL_LOCAL].dt.year
min_year = df['year'].min()
- df['x'] = df[TIME_COL].dt.dayofyear + 366 * (df['year'] % min_year)
+ df['x'] = df[TIME_COL_LOCAL].dt.dayofyear + 366 * (df['year'] % min_year)
# x_label
if num_days <= 21:
# tick daily
df['x_label'] = (
- get_year_week_in_df_column(df[TIME_COL])
+ get_year_week_in_df_column(df[TIME_COL_LOCAL])
+ '
'
- + df[TIME_COL].dt.date.astype(str).str[5:]
+ + df[TIME_COL_LOCAL].dt.date.astype(str).str[5:]
)
elif num_days <= 140:
df['x_label'] = gen_daily_ticks(df)
elif num_days <= 365 * 2:
# tick monthly
df['x_label'] = (
- get_year_week_in_df_column(df[TIME_COL])
+ get_year_week_in_df_column(df[TIME_COL_LOCAL])
+ '
'
- + df[TIME_COL].dt.month.astype(str).str.pad(2, fillchar='0')
+ + df[TIME_COL_LOCAL].dt.month.astype(str).str.pad(2, fillchar='0')
+ '-01'
)
else:
# tick yearly
- df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) + '
01-01'
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL_LOCAL]) + '
01-01'
time_fmt = '%Y-%m-%d %a %H:%M'
- df['from'] = 'From: ' + df[TIME_COL].dt.strftime(time_fmt) + '
'
+ df['from'] = 'From: ' + df[TIME_COL_LOCAL].dt.strftime(time_fmt) + '
'
unit = 'min' if hm_mode == 1 else 'h'
- df['to_temp'] = df[TIME_COL] + pd.to_timedelta(hm_step, unit=unit)
+ df['to_temp'] = df[TIME_COL_LOCAL] + pd.to_timedelta(hm_step, unit=unit)
df.loc[df['to_temp'].astype(str).str[11:16] == '00:00', 'to'] = (
- df['to_temp'].astype(str).str[:8] + df[TIME_COL].dt.strftime('%d %a ') + '24:00'
+ df['to_temp'].astype(str).str[:8] + df[TIME_COL_LOCAL].dt.strftime('%d %a ') + '24:00'
)
df.loc[df['to_temp'].astype(str).str[11:16] != '00:00', 'to'] = df['to_temp'].dt.strftime(
time_fmt
@@ -674,16 +675,26 @@ def convert_to_pandas_step(hm_step, hm_mode):
@log_execution_time()
@abort_process_handler()
-def create_agg_column(df, pd_step='4h', agg_col=AGG_COL, hm_mode=7, client_tz=tz.tzutc()):
+def create_agg_column(df, agg_col=AGG_COL, hm_mode=7, hm_step=4, df_cells=None):
"""Create aggregate column data"""
- if hm_mode == 7:
- length = 13
- else:
- length = 16
- temp = pd.to_datetime(df[TIME_COL], format='%Y-%m-%dT%H:%M', utc=True).dt.tz_convert(
- tz=client_tz
+ dt = pd.to_datetime(df[TIME_COL], format='%Y-%m-%dT%H:%M', utc=True)
+ df[agg_col] = None
+ #
+ group_list = df_cells[TIME_COL].tolist()
+ next_cell = (
+ group_list[-1] + pd.Timedelta(minutes=hm_step)
+ if hm_mode == 1
+ else group_list[-1] + pd.Timedelta(hours=hm_step)
)
- df[agg_col] = temp.dt.floor(pd_step).astype(str).str[:length]
+ group_list.append(next_cell)
+ group_list = pd.to_datetime(group_list, format='%Y-%m-%dT%H:%M', utc=True)
+
+ labels = df_cells[AGG_COL].tolist()
+ for i, label in enumerate(labels):
+ start_time = group_list[i]
+ end_time = group_list[i + 1]
+ start_index, end_index = dt.searchsorted([start_time, end_time])
+ df[start_index:end_index][agg_col] = label
return df
@@ -819,14 +830,13 @@ def gen_heatmap_data_as_dict(
df_cells = pd.DataFrame({TIME_COL: cells})
# time_delta = calc_time_delta(hm_mode, hm_step, start_tm)
if not df_cells.empty:
- df_cells[TIME_COL] = pd.to_datetime(df_cells[TIME_COL], utc=True).dt.tz_convert(
- tz=client_tz
- )
- df_cells = gen_agg_col(df_cells, hm_mode, hm_step)
+ df_cells[TIME_COL] = pd.to_datetime(df_cells[TIME_COL], utc=True)
+
+ df_cells = gen_agg_col(df_cells, hm_mode, hm_step, client_tz)
# limit to 10000 cells
dic_param.update({ACT_CELLS: df_cells.index.size})
- df_cells, end_tm = limit_num_cells(df_cells, end_tm, client_tz)
+ df_cells, end_tm = limit_num_cells(df_cells, end_tm)
# generate x, y, x_label, y_label
df_cells = gen_x_y(df_cells, hm_mode, hm_step, start_tm, end_tm)
@@ -872,8 +882,7 @@ def gen_heatmap_data_as_dict(
dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
# gen aggregate end col
- pd_step = convert_to_pandas_step(hm_step, hm_mode)
- df: pd.DataFrame = create_agg_column(df, pd_step, AGG_COL, hm_mode, client_tz)
+ df: pd.DataFrame = create_agg_column(df, AGG_COL, hm_mode, hm_step, df_cells)
agg_cols = gen_agg_col_names(var_agg_cols) # move
dic_df_proc = {}
diff --git a/ap/api/setting_module/services/autolink.py b/ap/api/setting_module/services/autolink.py
index 889e112..9d57d64 100644
--- a/ap/api/setting_module/services/autolink.py
+++ b/ap/api/setting_module/services/autolink.py
@@ -19,6 +19,7 @@
from ap.common.common_utils import detect_encoding, get_csv_delimiter, get_latest_files
from ap.common.constants import (
DF_CHUNK_SIZE,
+ DUMMY_V2_PROCESS_NAME,
MAXIMUM_PROCESSES_ORDER_FILES,
REVERSED_WELL_KNOWN_COLUMNS,
DataGroupType,
@@ -165,12 +166,12 @@ def drop_duplicates(df: DataFrame) -> DataFrame:
@log_execution_time(LOG_PREFIX)
def __read_v2(self, file: Union[Path, str], processes: List[str], ids: List[int]):
- datasource_type, is_abnormal_v2 = get_v2_datasource_type_from_file(file)
+ datasource_type, is_abnormal_v2, is_en_cols = get_v2_datasource_type_from_file(file)
if datasource_type not in [DBType.V2, DBType.V2_MULTI, DBType.V2_HISTORY]:
return
process_col = get_reversed_column_value_from_v2(
- datasource_type.name, DataGroupType.PROCESS_NAME.value, is_abnormal_v2
+ datasource_type.name, DataGroupType.PROCESS_NAME.value, is_abnormal_v2, is_en_cols
)
serial_col = get_reversed_column_value_from_v2(
datasource_type.name, DataGroupType.DATA_SERIAL.value, is_abnormal_v2
@@ -202,6 +203,8 @@ def __read_v2(self, file: Union[Path, str], processes: List[str], ids: List[int]
file, chunksize=DF_CHUNK_SIZE, nrows=AUTOLINK_TOTAL_RECORDS_PER_SOURCE, **params
) as reader:
for df_chunk in reader:
+ if DUMMY_V2_PROCESS_NAME in mapping_processes_id:
+ df_chunk[process_col] = df_chunk[process_col].fillna(DUMMY_V2_PROCESS_NAME)
df_processes = df_chunk[df_chunk[process_col].isin(mapping_processes_id)]
df_processes = df_processes.rename(columns=rename_params)
@@ -226,6 +229,8 @@ def __read_v2(self, file: Union[Path, str], processes: List[str], ids: List[int]
**params,
) as reader:
for df_chunk in reader:
+ if DUMMY_V2_PROCESS_NAME in mapping_processes_id:
+ df_chunk[process_col] = df_chunk[process_col].fillna(DUMMY_V2_PROCESS_NAME)
df_processes = df_chunk[df_chunk[process_col].isin(mapping_processes_id)]
df_processes = df_processes.rename(columns=rename_params)
diff --git a/ap/api/setting_module/services/csv_import.py b/ap/api/setting_module/services/csv_import.py
index 29a1267..7b5f26f 100644
--- a/ap/api/setting_module/services/csv_import.py
+++ b/ap/api/setting_module/services/csv_import.py
@@ -62,7 +62,10 @@
from ap.common.logger import log_execution_time
from ap.common.scheduler import JobType, scheduler_app_context
from ap.common.services.csv_content import is_normal_csv, read_data
-from ap.common.services.csv_header_wrapr import add_suffix_if_duplicated
+from ap.common.services.csv_header_wrapr import (
+ add_suffix_if_duplicated,
+ transform_duplicated_col_suffix_to_pandas_col,
+)
from ap.common.services.normalization import normalize_list, normalize_str
from ap.common.timezone_utils import (
add_days_from_utc,
@@ -245,6 +248,8 @@ def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT, is_user_request=Non
# check missing columns
if is_abnormal is False:
+ dic_csv_cols = None
+ dic_org_csv_cols = None
csv_cols = headers
# in case if v2, assume that there is not missing columns from v2 files
if not is_v2_datasource:
@@ -256,9 +261,13 @@ def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT, is_user_request=Non
delimiter=transformed_file_delimiter,
do_normalize=False,
)
- csv_cols = next(check_file)
- csv_cols = normalize_list(csv_cols)
- csv_cols = add_suffix_if_duplicated(csv_cols, True)
+ org_csv_cols = next(check_file)
+ csv_cols = normalize_list(org_csv_cols)
+ csv_cols, with_dupl_cols = add_suffix_if_duplicated(csv_cols, True)
+ dic_csv_cols = dict(zip(csv_cols, with_dupl_cols))
+ # add suffix to origin csv cols
+ org_csv_cols, _ = add_suffix_if_duplicated(org_csv_cols, True)
+ dic_org_csv_cols = dict(zip(csv_cols, org_csv_cols))
check_file.close()
# missing_cols = set(dic_use_cols).difference(csv_cols)
@@ -266,7 +275,13 @@ def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT, is_user_request=Non
valid_columns = list(set(dic_use_cols).intersection(csv_cols))
# re-arrange cols
valid_columns = [col for col in csv_cols if col in valid_columns]
+ dic_valid_csv_cols = dict(zip(valid_columns, [False] * len(valid_columns)))
missing_cols = [] if len(valid_columns) else dic_use_cols
+
+ if not is_v2_datasource:
+ valid_with_dupl_cols = [dic_csv_cols[col] for col in valid_columns]
+ dic_valid_csv_cols = dict(zip(valid_columns, valid_with_dupl_cols))
+
if DATETIME_DUMMY in missing_cols:
# remove dummy col before check
missing_cols.remove(DATETIME_DUMMY)
@@ -307,7 +322,10 @@ def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT, is_user_request=Non
continue
# default_csv_param['usecols'] = [i for i, col in enumerate(valid_columns) if col]
- default_csv_param['usecols'] = valid_columns
+ default_csv_param['usecols'] = transform_duplicated_col_suffix_to_pandas_col(
+ dic_valid_csv_cols,
+ dic_org_csv_cols,
+ )
use_col_names = [col for col in valid_columns if col]
# read csv file
@@ -318,7 +336,9 @@ def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT, is_user_request=Non
}
if is_v2_datasource:
- datasource_type, is_abnormal_v2 = get_v2_datasource_type_from_file(transformed_file)
+ datasource_type, is_abnormal_v2, is_en_cols = get_v2_datasource_type_from_file(
+ transformed_file
+ )
if datasource_type == DBType.V2_HISTORY:
df_one_file = get_df_v2_process_single_file(
transformed_file,
@@ -332,6 +352,7 @@ def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT, is_user_request=Non
process_name=data_src.process_name,
datasource_type=datasource_type,
is_abnormal_v2=is_abnormal_v2,
+ is_en_cols=is_en_cols,
)
else:
continue
diff --git a/ap/api/setting_module/services/show_latest_record.py b/ap/api/setting_module/services/show_latest_record.py
index 301688a..a70ee5c 100644
--- a/ap/api/setting_module/services/show_latest_record.py
+++ b/ap/api/setting_module/services/show_latest_record.py
@@ -110,7 +110,7 @@ def get_latest_records(data_source_id, table_name, limit):
return_df=True,
max_records=1000,
)
- headers = dic_preview.get('header')
+ headers = normalize_list(dic_preview.get('header'))
data_types = dic_preview.get('dataType')
same_values = dic_preview.get('same_values')
is_v2_history = dic_preview.get('v2_type') == DBType.V2_HISTORY
@@ -118,8 +118,9 @@ def get_latest_records(data_source_id, table_name, limit):
cols_with_types = gen_cols_with_types(headers, data_types, same_values, is_v2_history)
# sort columns
- sorted_columns = sorted(csv_detail.csv_columns, key=lambda c: c.order or c.id)
- cols = [col.column_name for col in sorted_columns if col.column_name in headers]
+ sorted(csv_detail.csv_columns, key=lambda c: c.order or c.id)
+ # cols = {col.column_name for col in sorted_columns if col.column_name in headers}
+ cols = headers
# get rows
df_rows = dic_preview.get('content', None)
@@ -495,7 +496,9 @@ def preview_v2_data(folder_url, csv_delimiter, limit, return_df=False, process_n
file_data_idx = 0
while file_data_idx >= 0:
largest_file = sorted_files[file_data_idx]
- datasource_type, is_abnormal_v2 = get_v2_datasource_type_from_file(largest_file)
+ datasource_type, is_abnormal_v2, is_en_cols = get_v2_datasource_type_from_file(
+ largest_file
+ )
if datasource_type == DBType.V2_HISTORY:
data_details = get_df_v2_process_single_file(
@@ -503,7 +506,7 @@ def preview_v2_data(folder_url, csv_delimiter, limit, return_df=False, process_n
)
elif datasource_type in [DBType.V2, DBType.V2_MULTI]:
data_details = get_vertical_df_v2_process_single_file(
- largest_file, process_name, datasource_type, is_abnormal_v2
+ largest_file, process_name, datasource_type, is_abnormal_v2, is_en_cols
)
else:
raise NotImplementedError
@@ -550,7 +553,7 @@ def preview_v2_data(folder_url, csv_delimiter, limit, return_df=False, process_n
# get DB Type and check if there is abnormal history
if is_abnormal_v2 is None and not datasource_type:
- datasource_type, is_abnormal_v2 = get_v2_datasource_type_from_file(csv_file)
+ datasource_type, is_abnormal_v2, _ = get_v2_datasource_type_from_file(csv_file)
header_names = rename_abnormal_history_col_names(datasource_type, header_names, is_abnormal_v2)
org_headers, header_names, dupl_cols = gen_colsname_for_duplicated(header_names)
df_data_details = normalize_big_rows(data_details, header_names)
@@ -626,6 +629,7 @@ def preview_v2_data(folder_url, csv_delimiter, limit, return_df=False, process_n
'has_dupl_cols': has_dupl_cols,
'org_headers': org_headers,
'v2_type': datasource_type.value,
+ 'is_process_null': not v2_process_names,
}
@@ -744,6 +748,7 @@ def convert_utc_df(df_rows, cols, data_types, data_source, table_name):
def transform_df_to_rows(cols, df_rows, limit):
+ df_rows.columns = normalize_list(df_rows.columns)
return [
dict(zip(cols, vals)) for vals in df_rows[0:limit][cols].to_records(index=False).tolist()
]
@@ -768,7 +773,7 @@ def gen_preview_data_check_dict(rows, previewed_files):
@log_execution_time()
def gen_colsname_for_duplicated(cols_name):
org_cols_name = cols_name.copy()
- cols_name, dup_cols = chw.add_suffix_if_duplicated(cols_name, True, True)
+ cols_name, dup_cols = chw.add_suffix_if_duplicated(cols_name, True)
return org_cols_name, cols_name, dup_cols
diff --git a/ap/api/setting_module/services/v2_etl_services.py b/ap/api/setting_module/services/v2_etl_services.py
index 9621579..05cd01d 100644
--- a/ap/api/setting_module/services/v2_etl_services.py
+++ b/ap/api/setting_module/services/v2_etl_services.py
@@ -2,7 +2,7 @@
import logging
import re
from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
@@ -16,11 +16,14 @@
ABNORMAL_V2_COLS,
ABNORMAL_WELL_KNOWN_COLUMNS,
DF_CHUNK_SIZE,
+ DUMMY_V2_PROCESS_NAME,
REVERSED_WELL_KNOWN_COLUMNS,
+ REVERSED_WELL_KNOWN_EN_COLUMNS,
SUB_PART_NO_DEFAULT_NO,
SUB_PART_NO_DEFAULT_SUFFIX,
SUB_PART_NO_NAMES,
WELL_KNOWN_COLUMNS,
+ WELL_KNOWN_EN_COLUMNS,
DataGroupType,
DataType,
DBType,
@@ -29,6 +32,7 @@
from ap.common.logger import log_execution_time
from ap.common.memoize import memoize
from ap.common.services.csv_content import gen_data_types, get_metadata
+from ap.common.services.csv_header_wrapr import add_suffix_if_duplicated
from ap.common.services.jp_to_romaji_utils import to_romaji
from ap.common.services.normalization import normalize_str
from ap.setting_module.models import (
@@ -124,14 +128,14 @@ def get_preview_processes_v2(
found_processes = set()
sorted_files = sorted_files[:maximum_files]
for f_name in sorted_files:
- datasource_type, is_abnormal_v2 = get_v2_datasource_type_from_file(f_name)
+ datasource_type, is_abnormal_v2, is_en_cols = get_v2_datasource_type_from_file(f_name)
if datasource_type not in [DBType.V2, DBType.V2_MULTI, DBType.V2_HISTORY]:
continue
process_col_name = get_reversed_column_value_from_v2(
- datasource_type.name, DataGroupType.PROCESS_NAME.value, is_abnormal_v2
+ datasource_type.name, DataGroupType.PROCESS_NAME.value, is_abnormal_v2, is_en_cols
)
- params = build_read_csv_for_v2(f_name, datasource_type, is_abnormal_v2)
+ params = build_read_csv_for_v2(f_name, datasource_type, is_abnormal_v2, is_en_cols)
# we only use process_col_name
params.update(usecols=[process_col_name])
try:
@@ -155,25 +159,32 @@ def get_preview_processes_v2(
@log_execution_time()
@memoize()
def get_df_v2_process_single_file(
- v2_file: str, process_name: str, datasource_type=None, is_abnormal_v2=False
+ v2_file: str,
+ process_name: str,
+ datasource_type=None,
+ is_abnormal_v2=False,
+ is_en_cols=False,
) -> DataFrame:
df = pd.DataFrame()
if not datasource_type:
- datasource_type, is_abnormal_v2 = get_v2_datasource_type_from_file(v2_file)
+ datasource_type, is_abnormal_v2, is_en_cols = get_v2_datasource_type_from_file(v2_file)
assert datasource_type in [
DBType.V2,
DBType.V2_MULTI,
DBType.V2_HISTORY,
], 'We only need to get process from v2'
process_col_name = get_reversed_column_value_from_v2(
- datasource_type.name, DataGroupType.PROCESS_NAME.value, is_abnormal_v2
+ datasource_type.name, DataGroupType.PROCESS_NAME.value, is_abnormal_v2, is_en_cols
)
- params = build_read_csv_for_v2(v2_file, datasource_type, is_abnormal_v2)
+ params = build_read_csv_for_v2(v2_file, datasource_type, is_abnormal_v2, is_en_cols)
try:
with pd.read_csv(v2_file, chunksize=DF_CHUNK_SIZE, **params) as reader:
for df_chunk in reader:
- df_process = df_chunk[df_chunk[process_col_name] == process_name]
+ if process_name == DUMMY_V2_PROCESS_NAME:
+ df_process = df_chunk[df_chunk[process_col_name].isna()]
+ else:
+ df_process = df_chunk[df_chunk[process_col_name] == process_name]
df_process = df_process.drop_duplicates()
df = pd.concat([df, df_process])
df = df.drop_duplicates()
@@ -182,7 +193,10 @@ def get_df_v2_process_single_file(
v2_file, chunksize=DF_CHUNK_SIZE, quoting=csv.QUOTE_NONE, **params
) as reader:
for df_chunk in reader:
- df_process = df_chunk[df_chunk[process_col_name] == process_name]
+ if process_name == DUMMY_V2_PROCESS_NAME:
+ df_process = df_chunk[df_chunk[process_col_name].isna()]
+ else:
+ df_process = df_chunk[df_chunk[process_col_name] == process_name]
df_process = df_process.drop_duplicates()
df = pd.concat([df, df_process])
df = df.drop_duplicates()
@@ -191,7 +205,7 @@ def get_df_v2_process_single_file(
pass
# rename abnormal history name
- if is_abnormal_v2:
+ if is_abnormal_v2 and datasource_type == DBType.V2_HISTORY:
df = rename_abnormal_history_col_names_from_df(df, datasource_type)
return df
@@ -208,40 +222,55 @@ def get_df_v2_process_multiple_files(v2_files: List[str], process_name: str) ->
@log_execution_time()
-def simple_convert_to_v2_vertical(df: DataFrame, datasource_type=None) -> DataFrame:
+def simple_convert_to_v2_vertical(
+ df: DataFrame, datasource_type=None, is_abnormal_v2=False, is_en_cols=False
+) -> DataFrame:
if not datasource_type:
- datasource_type, _ = get_v2_datasource_type_from_df(df)
+ datasource_type, is_abnormal_v2, is_en_cols = get_v2_datasource_type_from_df(df)
assert datasource_type in [
DBType.V2,
DBType.V2_MULTI,
], 'We only need to convert vertical from v2 and v2_multi'
- # TODO: the logic isn't the same as bridge, add more conditions later
- all_columns = WELL_KNOWN_COLUMNS[datasource_type.name].keys()
- quality_id_col = REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
- DataGroupType.QUALITY_ID.value
- )
- quality_name_col = REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
- DataGroupType.QUALITY_NAME.value
- )
- data_value_col = REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
- DataGroupType.DATA_VALUE.value
+ (
+ all_columns,
+ quality_id_col,
+ quality_name_col,
+ data_value_col,
+ ) = get_quality_and_data_value_from_v2(
+ datasource_type,
+ is_abnormal_v2,
+ is_en_cols,
)
+
+ quality_name_like_cols = []
+ data_value_like_cols = []
+ normalized_cols = df.columns
unique_cols = [
col for col in all_columns if col not in [quality_name_col, data_value_col, quality_id_col]
]
+ if is_en_cols:
+ normalized_cols = normalize_column_name(normalized_cols)
- quality_name_like_cols = [col for col in df.columns if col.startswith(quality_name_col)]
-
- data_value_like_cols = []
- for col in df.columns:
+ dict_normalized_cols = dict(zip(normalized_cols, df.columns))
+ for normalized_col, col in zip(normalized_cols, df.columns):
# pandas will add suffix '.' to duplicated columns
- if col == data_value_col or col.startswith(f'{data_value_col}.'):
+ if normalized_col == data_value_col or normalized_col.startswith(
+ (f'{data_value_col}.', f'{data_value_col}_')
+ ):
data_value_like_cols.append(col)
+ if normalized_col.startswith(quality_name_col):
+ quality_name_like_cols.append(col)
+
assert len(quality_name_like_cols) == len(data_value_like_cols)
assert len(quality_name_like_cols) > 0
+ # get original columns name
+ quality_name_col = dict_normalized_cols[quality_name_col]
+ data_value_col = dict_normalized_cols[data_value_col]
+ unique_cols = [dict_normalized_cols[col] for col in unique_cols]
+
# we don't need to melt if we don't have multiple quality_name + data_value columns
if len(quality_name_like_cols) > 1:
# TODO: use temporary column from constant
@@ -249,6 +278,7 @@ def simple_convert_to_v2_vertical(df: DataFrame, datasource_type=None) -> DataFr
temp_id_column = '__id__'
stub_columns_to_be_converted = [quality_name_col, data_value_col]
+
# add ".0" since the first columns does not have this suffix
mapping = {col: f'{col}.0' for col in stub_columns_to_be_converted}
df = df.rename(columns=mapping)
@@ -270,16 +300,39 @@ def simple_convert_to_v2_vertical(df: DataFrame, datasource_type=None) -> DataFr
df = df.drop_duplicates(subset=unique_cols + [quality_name_col])
df = df.dropna(subset=[quality_name_col])
+ # replace vertical cols
+ unique_vertical_cols = df[quality_name_col].unique().tolist()
+ normalized_vertical_cols = (
+ pd.Series(unique_vertical_cols)
+ .apply(normalize_str)
+ .replace(r'計測値:|measurement.', '', regex=True)
+ )
+
df = df.pivot(index=unique_cols, columns=quality_name_col, values=data_value_col)
+ # find duplicate quality columns, compare with index of df
+ duplicated_cols = list(set(df.columns).intersection(unique_cols))
+ filter_cols = unique_cols + list(normalized_vertical_cols)
+ if len(duplicated_cols) or len(filter_cols):
+ df_columns, is_duplicated = add_suffix_if_duplicated(filter_cols, skip_zero=True)
+ # rename columns if there is duplicated measure item name
+ if True in is_duplicated:
+ normalized_vertical_cols = df_columns[len(unique_cols) :]
+ df.rename(columns=dict(zip(unique_vertical_cols, normalized_vertical_cols)), inplace=True)
return df.reset_index()
@log_execution_time()
def get_vertical_df_v2_process_single_file(
- file: str, process_name: str, datasource_type=None, is_abnormal_v2=False
+ file: str,
+ process_name: str,
+ datasource_type=None,
+ is_abnormal_v2=False,
+ is_en_cols=False,
) -> DataFrame:
- df = get_df_v2_process_single_file(file, process_name, datasource_type, is_abnormal_v2)
- df = simple_convert_to_v2_vertical(df, datasource_type)
+ df = get_df_v2_process_single_file(
+ file, process_name, datasource_type, is_abnormal_v2, is_en_cols
+ )
+ df = simple_convert_to_v2_vertical(df, datasource_type, is_abnormal_v2, is_en_cols)
return df
@@ -292,7 +345,7 @@ def get_vertical_df_v2_process_multiple_files(v2_files: List[str], process_name:
@log_execution_time()
def build_read_csv_for_v2(
- file_path: str, datasource_type: DBType = DBType.V2, is_abnormal_v2=False
+ file_path: str, datasource_type: DBType = DBType.V2, is_abnormal_v2=False, is_en_cols=False
):
from ap.api.setting_module.services.data_import import NA_VALUES
@@ -305,11 +358,21 @@ def build_read_csv_for_v2(
must_get_columns = tuple(WELL_KNOWN_COLUMNS[datasource_type.name].keys())
if is_abnormal_v2:
must_get_columns = tuple(ABNORMAL_WELL_KNOWN_COLUMNS[datasource_type.name].keys())
- usecols = lambda x: x.startswith(must_get_columns)
+
+ if is_en_cols:
+ must_get_columns = tuple(WELL_KNOWN_EN_COLUMNS[datasource_type.name].keys())
+
+ def usecols(x):
+ return x.startswith(must_get_columns)
+
+ def usecols_with_normalization(x):
+ [normalized_x] = normalize_column_name([x])
+ return normalized_x.startswith(must_get_columns)
+
dtype = 'str'
params.update(
dict(
- usecols=usecols,
+ usecols=usecols if not is_en_cols else usecols_with_normalization,
skipinitialspace=True,
na_values=NA_VALUES,
error_bad_lines=False,
@@ -371,7 +434,7 @@ def rename_sub_part_no(df: pd.DataFrame, datasource_type=None) -> Tuple[DataFram
part_no_columns.append(partno)
if not datasource_type:
- datasource_type, _ = get_v2_datasource_type_from_df(df)
+ datasource_type, *_ = get_v2_datasource_type_from_df(df)
if datasource_type != DBType.V2_HISTORY:
# v2 measure
@@ -435,25 +498,39 @@ def find_remaining_columns(process_id, all_columns):
return [col for col in all_columns if col not in used_columns]
-def get_v2_datasource_type_from_file(v2_file: Union[Path, str]) -> Optional[DBType]:
+def get_v2_datasource_type_from_file(v2_file: Union[Path, str]) -> tuple[Any, Any, Any]:
"""Check if this file is v2, v2 multi or v2 history"""
df = pd.read_csv(v2_file, nrows=1)
- return get_v2_datasource_type_from_df(df)
+ datasource_type, is_abnormal, is_en_cols = get_v2_datasource_type_from_df(df)
+ return datasource_type, is_abnormal, is_en_cols
-def get_v2_datasource_type_from_df(df: DataFrame) -> Tuple[Optional[DBType], bool]:
+def get_v2_datasource_type_from_df(
+ df: DataFrame,
+) -> Union[tuple[DBType, bool, bool], tuple[None, bool, bool]]:
columns = set(col.strip() for col in df.columns)
is_abnormal = False
+ is_en_cols = False
for datasource_type in [DBType.V2_HISTORY, DBType.V2, DBType.V2_MULTI]:
must_exist_columns = set(WELL_KNOWN_COLUMNS[datasource_type.name].keys())
abnormal_must_exist_columns = set(ABNORMAL_WELL_KNOWN_COLUMNS[datasource_type.name].keys())
+ en_must_exist_columns = (
+ set(WELL_KNOWN_EN_COLUMNS[datasource_type.name].keys())
+ if datasource_type.name in WELL_KNOWN_EN_COLUMNS
+ else ()
+ )
if columns >= must_exist_columns:
- return datasource_type, is_abnormal
+ return datasource_type, is_abnormal, is_en_cols
if columns >= abnormal_must_exist_columns:
is_abnormal = True
- return datasource_type, is_abnormal
- return None, is_abnormal
+ return datasource_type, is_abnormal, is_en_cols
+
+ if len(en_must_exist_columns):
+ normalize_cols = set(normalize_column_name(columns))
+ if normalize_cols >= en_must_exist_columns:
+ return datasource_type, is_abnormal, True
+ return None, is_abnormal, is_en_cols
@log_execution_time()
@@ -483,7 +560,7 @@ def prepare_to_import_v2_df(
:return: transformed dataframe, has_new_columns
"""
if not datasource_type:
- datasource_type, _ = get_v2_datasource_type_from_df(df)
+ datasource_type, *_ = get_v2_datasource_type_from_df(df)
col_names = {col: normalize_str(col) for col in df.columns}
df = df.rename(columns=col_names)
@@ -497,13 +574,18 @@ def prepare_to_import_v2_df(
return df, has_remaining_cols
-def get_reversed_column_value_from_v2(datasource_type, reversed_column_name, is_abnormal_v2):
+def get_reversed_column_value_from_v2(
+ datasource_type, reversed_column_name, is_abnormal_v2, is_en_cols=False
+):
"""
:return: v2 normal column name
"""
if is_abnormal_v2:
return ABNORMAL_REVERSED_WELL_KNOWN_COLUMNS[datasource_type][reversed_column_name]
+ if is_en_cols:
+ return REVERSED_WELL_KNOWN_EN_COLUMNS[datasource_type][reversed_column_name]
+
return REVERSED_WELL_KNOWN_COLUMNS[datasource_type][reversed_column_name]
@@ -539,3 +621,62 @@ def rename_abnormal_history_col_names_from_df(df, datasource_type):
if len(rename_headers.keys()):
df.rename(columns=rename_headers, inplace=True)
return df
+
+
+def normalize_column_name(columns_name):
+ # define to convert these symbols to underscore
+ convert_symbols = ['.', '/', ' ', '-']
+ normalize_cols = []
+ for column_name in columns_name:
+ col_name = column_name.lower()
+ for symbol in convert_symbols:
+ col_name = col_name.replace(symbol, '_')
+ if col_name[-1] == '_':
+ # remove last underscore of column name
+ # eg. serial_no_ -> serial_no
+ col_name = col_name[:-1]
+ normalize_cols.append(col_name)
+ return normalize_cols
+
+
+def get_quality_and_data_value_from_v2(datasource_type, is_abnormal_v2=False, is_en_cols=False):
+ """
+ :return: v2 quality and data value column
+ """
+ all_columns = WELL_KNOWN_COLUMNS[datasource_type.name].keys()
+ quality_id_col = REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
+ DataGroupType.QUALITY_ID.value
+ )
+ quality_name_col = REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
+ DataGroupType.QUALITY_NAME.value
+ )
+ data_value_col = REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
+ DataGroupType.DATA_VALUE.value
+ )
+
+ # for v2 which column is en.
+ if is_en_cols:
+ all_columns = WELL_KNOWN_EN_COLUMNS[datasource_type.name].keys()
+ quality_id_col = REVERSED_WELL_KNOWN_EN_COLUMNS[datasource_type.name].get(
+ DataGroupType.QUALITY_ID.value
+ )
+ quality_name_col = REVERSED_WELL_KNOWN_EN_COLUMNS[datasource_type.name].get(
+ DataGroupType.QUALITY_NAME.value
+ )
+ data_value_col = REVERSED_WELL_KNOWN_EN_COLUMNS[datasource_type.name].get(
+ DataGroupType.DATA_VALUE.value
+ )
+
+ if is_abnormal_v2:
+ all_columns = ABNORMAL_WELL_KNOWN_COLUMNS[datasource_type.name].keys()
+ quality_id_col = ABNORMAL_REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
+ DataGroupType.QUALITY_ID.value
+ )
+ quality_name_col = ABNORMAL_REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
+ DataGroupType.QUALITY_NAME.value,
+ )
+ data_value_col = ABNORMAL_REVERSED_WELL_KNOWN_COLUMNS[datasource_type.name].get(
+ DataGroupType.DATA_VALUE.value
+ )
+
+ return all_columns, quality_id_col, quality_name_col, data_value_col
diff --git a/ap/common/constants.py b/ap/common/constants.py
index d80e7cf..585cd51 100644
--- a/ap/common/constants.py
+++ b/ap/common/constants.py
@@ -60,6 +60,8 @@
AS_HEATMAP_MATRIX = 'as_heatmap_matrix'
HEATMAP_MATRIX = 'heatmap_matrix'
+DUMMY_V2_PROCESS_NAME = 'DUMMY_V2_PROCESS_NAME'
+
class ApLogLevel(Enum):
DEBUG = auto()
@@ -371,6 +373,7 @@ class ErrorMsg(Enum):
CELL_SUFFIX = '_cell'
AGG_COL = 'agg_col'
TIME_COL = 'time'
+TIME_COL_LOCAL = 'time_local'
REQUEST_THREAD_ID = 'thread_id'
SERIALS = 'serials'
@@ -940,29 +943,35 @@ class DataGroupType(BaseEnum):
SUB_LOT_NO = 15
SUB_TRAY_NO = 16
SUB_SERIAL = 17
- # generate equation
- Femto_Date = 18
- Femto_Mach = 19
- Femto_Order = 20
- Line = 21
- Datetime = 22
- Milling = 23
-
- FACTORY_ID = 24
- FACTORY_NAME = 25
- PLANT_ID = 26
- PLANT_NO = 27
- DEPT_ID = 28
- DEPT_NAME = 29
- LINE_GROUP_ID = 30
- LINE_GROUP_NAME = 31
- PART_FULL = 32
- EQUIP_ID = 33 # TODO CHECK
- HORIZONTAL_DATA = 34 # Type for horizontal columns that are sensor columns
-
- # PART_LOG
- FORGING_DATE = 35
- DELIVERY_ASSY_FASTEN_TORQUE = 36
+
+ # add new columns
+ WORK_TYPE = 18
+ QUALITY = 19
+ LOT_NO = 20
+ TRAY_NO = 21
+ # # generate equation
+ # Femto_Date = 18
+ # Femto_Mach = 19
+ # Femto_Order = 20
+ # Line = 21
+ # Datetime = 22
+ # Milling = 23
+ #
+ # FACTORY_ID = 24
+ # FACTORY_NAME = 25
+ # PLANT_ID = 26
+ # PLANT_NO = 27
+ # DEPT_ID = 28
+ # DEPT_NAME = 29
+ # LINE_GROUP_ID = 30
+ # LINE_GROUP_NAME = 31
+ # PART_FULL = 32
+ # EQUIP_ID = 33 # TODO CHECK
+ # HORIZONTAL_DATA = 34 # Type for horizontal columns that are sensor columns
+ #
+ # # PART_LOG
+ # FORGING_DATE = 35
+ # DELIVERY_ASSY_FASTEN_TORQUE = 36
# PRODUCT_ID = 35
@@ -1001,6 +1010,9 @@ def get_all_reserved_groups(cls):
'子設備ID': DataGroupType.MACHINE_ID.value,
'子設備名': DataGroupType.MACHINE_NAME.value,
'品番': DataGroupType.PART_NO.value,
+ 'ワーク種別': DataGroupType.WORK_TYPE.value,
+ 'ロットNo': DataGroupType.LOT_NO.value,
+ 'トレイNo': DataGroupType.TRAY_NO.value,
'シリアルNo': DataGroupType.DATA_SERIAL.value,
'計測日時': DataGroupType.DATA_TIME.value,
'計測項目ID': DataGroupType.QUALITY_ID.value,
@@ -1015,6 +1027,10 @@ def get_all_reserved_groups(cls):
'子設備ID': DataGroupType.MACHINE_ID.value,
'子設備': DataGroupType.MACHINE_NAME.value,
'品番': DataGroupType.PART_NO.value,
+ 'ワーク種別': DataGroupType.WORK_TYPE.value,
+ '良否': DataGroupType.QUALITY.value,
+ 'ロットNo': DataGroupType.LOT_NO.value,
+ 'トレイNo': DataGroupType.TRAY_NO.value,
'シリアルNo': DataGroupType.DATA_SERIAL.value,
'加工日時': DataGroupType.DATA_TIME.value,
'測定項目名': DataGroupType.QUALITY_NAME.value,
@@ -1028,6 +1044,9 @@ def get_all_reserved_groups(cls):
'子設備ID': DataGroupType.MACHINE_ID.value,
'子設備': DataGroupType.MACHINE_NAME.value,
'品番': DataGroupType.PART_NO.value,
+ 'ワーク種別': DataGroupType.WORK_TYPE.value,
+ 'ロットNo': DataGroupType.LOT_NO.value,
+ 'トレイNo': DataGroupType.TRAY_NO.value,
'シリアルNo': DataGroupType.DATA_SERIAL.value,
'加工日時': DataGroupType.DATA_TIME.value,
'子部品品番': DataGroupType.SUB_PART_NO.value,
@@ -1046,6 +1065,9 @@ def get_all_reserved_groups(cls):
DataGroupType.MACHINE_ID.value: '子設備ID',
DataGroupType.MACHINE_NAME.value: '子設備名',
DataGroupType.PART_NO.value: '品番',
+ DataGroupType.WORK_TYPE.value: 'ワーク種別',
+ DataGroupType.LOT_NO.value: 'ロットNo',
+ DataGroupType.TRAY_NO.value: 'トレイNo',
DataGroupType.DATA_SERIAL.value: 'シリアルNo',
DataGroupType.DATA_TIME.value: '計測日時',
DataGroupType.QUALITY_ID.value: '計測項目ID',
@@ -1061,6 +1083,10 @@ def get_all_reserved_groups(cls):
DataGroupType.MACHINE_NAME.value: '子設備',
DataGroupType.PART_NO.value: '品番',
DataGroupType.DATA_SERIAL.value: 'シリアルNo',
+ DataGroupType.WORK_TYPE.value: 'ワーク種別',
+ DataGroupType.QUALITY.value: '良否',
+ DataGroupType.LOT_NO.value: 'ロットNo',
+ DataGroupType.TRAY_NO.value: 'トレイNo',
DataGroupType.DATA_TIME.value: '加工日時',
DataGroupType.QUALITY_NAME.value: '測定項目名',
DataGroupType.DATA_VALUE.value: '測定値',
@@ -1073,6 +1099,9 @@ def get_all_reserved_groups(cls):
DataGroupType.MACHINE_ID.value: '子設備ID',
DataGroupType.MACHINE_NAME.value: '子設備',
DataGroupType.PART_NO.value: '品番',
+ DataGroupType.WORK_TYPE.value: 'ワーク種別',
+ DataGroupType.LOT_NO.value: 'ロットNo',
+ DataGroupType.TRAY_NO.value: 'トレイNo',
DataGroupType.DATA_SERIAL.value: 'シリアルNo',
DataGroupType.DATA_TIME.value: '加工日時',
DataGroupType.SUB_PART_NO.value: '子部品品番',
@@ -1090,6 +1119,9 @@ def get_all_reserved_groups(cls):
'子設備ID': DataGroupType.MACHINE_ID.value,
'子設備名': DataGroupType.MACHINE_NAME.value,
'品番': DataGroupType.PART_NO.value,
+ 'ワーク種別': DataGroupType.WORK_TYPE.value,
+ 'ロット番号': DataGroupType.LOT_NO.value,
+ 'トレイ番号': DataGroupType.TRAY_NO.value,
'シリアル番号': DataGroupType.DATA_SERIAL.value,
'計測日時': DataGroupType.DATA_TIME.value,
'計測項目ID': DataGroupType.QUALITY_ID.value,
@@ -1104,6 +1136,10 @@ def get_all_reserved_groups(cls):
'子設備ID': DataGroupType.MACHINE_ID.value,
'子設備': DataGroupType.MACHINE_NAME.value,
'品番': DataGroupType.PART_NO.value,
+ 'ワーク種別': DataGroupType.WORK_TYPE.value,
+ '良否結果': DataGroupType.QUALITY.value,
+ 'ロットNo': DataGroupType.LOT_NO.value,
+ 'トレイNo': DataGroupType.TRAY_NO.value,
'シリアルNo': DataGroupType.DATA_SERIAL.value,
'加工日時': DataGroupType.DATA_TIME.value,
'測定項目名': DataGroupType.QUALITY_NAME.value,
@@ -1117,6 +1153,9 @@ def get_all_reserved_groups(cls):
'子設備ID': DataGroupType.MACHINE_ID.value,
'子設備名': DataGroupType.MACHINE_NAME.value,
'品番': DataGroupType.PART_NO.value,
+ 'ワーク種別': DataGroupType.WORK_TYPE.value,
+ 'ロットNo': DataGroupType.LOT_NO.value,
+ 'トレイNo': DataGroupType.TRAY_NO.value,
'シリアルNo': DataGroupType.DATA_SERIAL.value,
'計測日時': DataGroupType.DATA_TIME.value,
'子部品品番': DataGroupType.SUB_PART_NO.value,
@@ -1125,6 +1164,7 @@ def get_all_reserved_groups(cls):
'子部品シリアルNo': DataGroupType.SUB_SERIAL.value,
},
}
+
ABNORMAL_REVERSED_WELL_KNOWN_COLUMNS = {
DBType.V2.name: {
DataGroupType.LINE_ID.value: 'ラインID',
@@ -1134,6 +1174,9 @@ def get_all_reserved_groups(cls):
DataGroupType.MACHINE_ID.value: '子設備ID',
DataGroupType.MACHINE_NAME.value: '子設備名',
DataGroupType.PART_NO.value: '品番',
+ DataGroupType.WORK_TYPE.value: 'ワーク種別',
+ DataGroupType.LOT_NO.value: 'ロット番号',
+ DataGroupType.TRAY_NO.value: 'トレイ番号',
DataGroupType.DATA_SERIAL.value: 'シリアル番号',
DataGroupType.DATA_TIME.value: '計測日時',
DataGroupType.QUALITY_ID.value: '計測項目ID',
@@ -1148,6 +1191,10 @@ def get_all_reserved_groups(cls):
DataGroupType.MACHINE_ID.value: '子設備ID',
DataGroupType.MACHINE_NAME.value: '子設備',
DataGroupType.PART_NO.value: '品番',
+ DataGroupType.WORK_TYPE.value: 'ワーク種別',
+ DataGroupType.QUALITY.value: '良否結果',
+ DataGroupType.LOT_NO.value: 'ロットNo',
+ DataGroupType.TRAY_NO.value: 'トレイNo',
DataGroupType.DATA_SERIAL.value: 'シリアルNo',
DataGroupType.DATA_TIME.value: '加工日時',
DataGroupType.QUALITY_NAME.value: '測定項目名',
@@ -1161,6 +1208,9 @@ def get_all_reserved_groups(cls):
DataGroupType.MACHINE_ID.value: '子設備ID',
DataGroupType.MACHINE_NAME.value: '子設備名',
DataGroupType.PART_NO.value: '品番',
+ DataGroupType.WORK_TYPE.value: 'ワーク種別',
+ DataGroupType.LOT_NO.value: 'ロットNo',
+ DataGroupType.TRAY_NO.value: 'トレイNo',
DataGroupType.DATA_SERIAL.value: 'シリアルNo',
DataGroupType.DATA_TIME.value: '計測日時',
DataGroupType.SUB_PART_NO.value: '子部品品番',
@@ -1175,6 +1225,48 @@ def get_all_reserved_groups(cls):
'子設備名': DataGroupType.MACHINE_NAME.value,
'計測日時': DataGroupType.DATA_TIME.value,
'シリアル番号': DataGroupType.DATA_SERIAL.value,
+ 'ロット番号': DataGroupType.LOT_NO.value,
+ 'トレイ番号': DataGroupType.TRAY_NO.value,
+}
+
+# for en column name from v2 files
+WELL_KNOWN_EN_COLUMNS = {
+ DBType.V2_MULTI.name: {
+ 'line_id': DataGroupType.LINE_ID.value,
+ 'line': DataGroupType.LINE_NAME.value,
+ 'process_id': DataGroupType.PROCESS_ID.value,
+ 'process': DataGroupType.PROCESS_NAME.value,
+ 'equipment_id': DataGroupType.MACHINE_ID.value,
+ 'equipment': DataGroupType.MACHINE_NAME.value,
+ 'part_number': DataGroupType.PART_NO.value,
+ 'work_type': DataGroupType.WORK_TYPE.value,
+ 'quality': DataGroupType.QUALITY.value,
+ 'lot_no': DataGroupType.LOT_NO.value,
+ 'tray_no': DataGroupType.TRAY_NO.value,
+ 'serial_no': DataGroupType.DATA_SERIAL.value,
+ 'processed_date_time': DataGroupType.DATA_TIME.value,
+ 'measurement_item_name': DataGroupType.QUALITY_NAME.value,
+ 'measured_value': DataGroupType.DATA_VALUE.value,
+ },
+}
+REVERSED_WELL_KNOWN_EN_COLUMNS = {
+ DBType.V2_MULTI.name: {
+ DataGroupType.LINE_ID.value: 'line_id',
+ DataGroupType.LINE_NAME.value: 'line',
+ DataGroupType.PROCESS_ID.value: 'process_id',
+ DataGroupType.PROCESS_NAME.value: 'process',
+ DataGroupType.MACHINE_ID.value: 'equipment_id',
+ DataGroupType.MACHINE_NAME.value: 'equipment',
+ DataGroupType.PART_NO.value: 'part_number',
+ DataGroupType.WORK_TYPE.value: 'work_type',
+ DataGroupType.QUALITY.value: 'quality',
+ DataGroupType.LOT_NO.value: 'lot_no',
+ DataGroupType.TRAY_NO.value: 'tray_no',
+ DataGroupType.DATA_SERIAL.value: 'serial_no',
+ DataGroupType.DATA_TIME.value: 'processed_date_time',
+ DataGroupType.QUALITY_NAME.value: 'measurement_item_name',
+ DataGroupType.DATA_VALUE.value: 'measured_value',
+ },
}
SUB_PART_NO_DEFAULT_SUFFIX = '.'
diff --git a/ap/common/services/csv_content.py b/ap/common/services/csv_content.py
index 8a894b7..d902276 100644
--- a/ap/common/services/csv_content.py
+++ b/ap/common/services/csv_content.py
@@ -203,6 +203,17 @@ def check_data_type(data):
except (ValueError, TypeError):
pass
+ # try if there is not iso format of datetime
+ # eg: 20-09-2023 01:00
+ try:
+ is_datetime = datetime.fromisoformat(data)
+ if is_datetime:
+ return DataType.DATETIME
+ except (ValueError, TypeError):
+ valid_dt = detect_datetime(data)
+ if valid_dt:
+ return DataType.DATETIME
+
return predict_eu_type(data)
@@ -360,3 +371,12 @@ def zip_file_to_response(csv_data, file_names, export_type='csv'):
response.charset = encoding
return response
+
+
+def detect_datetime(datetime_value):
+ try:
+ # try to read value as datetime
+ pd.to_datetime([datetime_value])
+ return True
+ except (ValueError, TypeError):
+ return False
diff --git a/ap/common/services/csv_header_wrapr.py b/ap/common/services/csv_header_wrapr.py
index fd8391d..2d49290 100644
--- a/ap/common/services/csv_header_wrapr.py
+++ b/ap/common/services/csv_header_wrapr.py
@@ -540,7 +540,7 @@ def summarize_header_as_df(hdr: dict, info: dict):
# translate
head['main'] = _translate_wellknown_jp2en(head['main'])
# if head$main has some same value, add _01, _02, ...
- head['main'] = add_suffix_if_duplicated(head['main'])
+ head['main'], _ = add_suffix_if_duplicated(head['main'])
df_head = pd.DataFrame(head, index=head['main'])
return df_head
@@ -601,13 +601,11 @@ def _translate_wellknown_jp2en(x):
return x
-def add_suffix_if_duplicated(x, skip_zero=False, with_dupl_cols=False):
- is_dupl_cols = [False] * len(x)
- duplicated = [k for k, v in Counter(x).items() if v > 1]
+def add_suffix_if_duplicated(names, skip_zero=False):
+ is_dupl_cols = [False] * len(names)
+ duplicated = [k for k, v in Counter(names).items() if v > 1]
if len(duplicated) == 0:
- if with_dupl_cols:
- return x, is_dupl_cols
- return x
+ return names, is_dupl_cols
if not skip_zero:
# [a_01, a_02, a_03]
@@ -616,18 +614,35 @@ def add_suffix_if_duplicated(x, skip_zero=False, with_dupl_cols=False):
# [a, a_01, a_02]
suffix_format = (f'_{str(x - 1).zfill(2)!s}' if x > 1 else '' for x in range(1, 100))
dic_suffix = dict(zip(duplicated, tee(suffix_format, len(duplicated))))
- for idx, s in enumerate(x):
+ for idx, s in enumerate(names):
try:
suffix = str(next(dic_suffix[s]))
except KeyError:
continue
else:
- x[idx] += suffix
- is_dupl_cols[idx] = True
+ names[idx] += suffix
+ if suffix:
+ is_dupl_cols[idx] = True
+
+ return names, is_dupl_cols
+
+
+def transform_duplicated_col_suffix_to_pandas_col(dic_valid_csv_cols, dic_original_cols):
+ col_names = []
+ for col_name, is_add_suffix in dic_valid_csv_cols.items():
+ org_col_name = col_name if not dic_original_cols else dic_original_cols[col_name]
+ if is_add_suffix:
+ # [a, a_01, a_02] -> [a, a.1, a.2]
+ matched = org_col_name.split('_')
+ if len(matched) > 1 and matched[-1].isdigit():
+ s = '_'.join(matched[0:-1])
+ col_names.append(f'{s}.{int(matched[-1])}')
+ else:
+ col_names.append(org_col_name)
+ else:
+ col_names.append(org_col_name)
- if with_dupl_cols:
- return x, is_dupl_cols
- return x
+ return col_names
# =========================
diff --git a/ap/setting_module/models.py b/ap/setting_module/models.py
index e94bc3c..7d946c3 100644
--- a/ap/setting_module/models.py
+++ b/ap/setting_module/models.py
@@ -591,11 +591,11 @@ def get_all_columns(cls, proc_id):
@classmethod
def get_columns_by_process_id(cls, proc_id):
- return (
- cls.query.filter(cls.process_id == proc_id)
- .with_entities(cls.id, cls.name, cls.data_type)
- .all()
- )
+ columns = cls.query.filter(cls.process_id == proc_id).all()
+ return [
+ {cls.id.name: col.id, 'name': col.shown_name, cls.data_type.name: col.data_type}
+ for col in columns
+ ]
class CfgProcess(db.Model):
@@ -807,7 +807,8 @@ def update_order(cls, meta_session, process_id, order):
@classmethod
def get_list_of_process(cls):
- return cls.query.with_entities(cls.id, cls.name).all()
+ processes = cls.query.order_by(cls.id).all()
+ return [{cls.id.name: proc.id, cls.name.name: proc.shown_name} for proc in processes]
class CfgTraceKey(db.Model):
diff --git a/ap/setting_module/schemas.py b/ap/setting_module/schemas.py
index 61e14e0..19c285a 100644
--- a/ap/setting_module/schemas.py
+++ b/ap/setting_module/schemas.py
@@ -126,6 +126,7 @@ class Meta:
name_local = fields.String(required=False, allow_none=True)
name_en = fields.String(required=False, allow_none=False)
shown_name = fields.String(required=False, allow_none=True)
+ name = fields.String(required=False, allow_none=True)
@post_load
def make_obj(self, data, **kwargs):
diff --git a/ap/setting_module/services/process_config.py b/ap/setting_module/services/process_config.py
index 5bb8819..e18a081 100644
--- a/ap/setting_module/services/process_config.py
+++ b/ap/setting_module/services/process_config.py
@@ -95,7 +95,8 @@ def create_or_update_process_cfg(proc_data, unused_columns):
proc_column.process_id = process.id
# transform english name
- proc_column.name_en = to_romaji(proc_column.column_name)
+ if not proc_column.name_en:
+ proc_column.name_en = to_romaji(proc_column.column_name)
sensor = Sensor.get_sensor_by_col_name(process.id, proc_column.column_name)
diff --git a/ap/static/aggregate_plot/js/aggregate_plot.js b/ap/static/aggregate_plot/js/aggregate_plot.js
index 1c564d0..8ea217f 100644
--- a/ap/static/aggregate_plot/js/aggregate_plot.js
+++ b/ap/static/aggregate_plot/js/aggregate_plot.js
@@ -277,7 +277,10 @@ const collectInputAsFormData = (clearOnFlyFilter, autoUpdate = false) => {
formData.set(CYCLIC_TERM.DIV_OFFSET, offsetH.toString());
}
- formData.set('divDates', JSON.stringify(divFromTo));
+ // convert divFromTo from local to UTC
+ const divDates = divFromTo.map(date => toUTCDateTime(date, null, true));
+
+ formData.set('divDates', JSON.stringify(divDates));
formData.set('divFormats', JSON.stringify(divFormats))
}
diff --git a/ap/static/common/js/utils.js b/ap/static/common/js/utils.js
index 978580e..caedf0b 100644
--- a/ap/static/common/js/utils.js
+++ b/ap/static/common/js/utils.js
@@ -1374,11 +1374,15 @@ const syncTraceDateTimeRange = (parentId = '', dtNames = {}, dtValues = {}) => {
}
};
-const toUTCDateTime = (localDate, localTime) => {
- if (isEmpty(localDate) || isEmpty(localTime)) return {date: localDate, time: localTime};
+const toUTCDateTime = (localDate, localTime, withDateTime = false) => {
+ if (!withDateTime && (isEmpty(localDate) || isEmpty(localTime))) return {date: localDate, time: localTime};
- const utcDT = moment.utc(moment(`${localDate} ${localTime}`, `${DATE_FORMAT} ${TIME_FORMAT}`));
+ const datetime = withDateTime ? localDate : `${localDate} ${localTime}`;
+ const utcDT = moment.utc(moment(datetime, `${DATE_FORMAT} ${TIME_FORMAT}`));
if (utcDT.isValid()) {
+ if (withDateTime) {
+ return utcDT.format(DATE_TIME_FMT);
+ }
return {
date: utcDT.format(DATE_FORMAT),
time: utcDT.format(TIME_FORMAT),
diff --git a/ap/static/setting_module/js/db_config.js b/ap/static/setting_module/js/db_config.js
index 9ac97e6..f1c4feb 100644
--- a/ap/static/setting_module/js/db_config.js
+++ b/ap/static/setting_module/js/db_config.js
@@ -6,6 +6,7 @@ let v2DataSources = null;
let MAX_NUMBER_OF_SENSOR = 100000000;
let isV2ProcessConfigOpening = false;
let v2ImportInterval = null;
+const DUMMY_V2_PROCESS_NAME = 'DUMMY_V2_PROCESS_NAME';
// data type
const originalTypes = {
0: null,
@@ -319,10 +320,16 @@ const showResources = async () => {
} else {
showLatestRecordsFromDS(res, true, true, !!res.v2_processes);
}
+
+ if (isV2 && res.is_process_null) {
+ res.v2_processes = [DUMMY_V2_PROCESS_NAME];
+ res.v2_processes_shown_name = [$(dbConfigElements.csvDBSourceName).val()];
+ }
// update process of V2
if (res.v2_processes && res.v2_processes.length) {
const v2ProcessList = res.v2_processes;
- addProcessList(v2ProcessList, v2ProcessList);
+ const v2ProcessShownNameList = res.v2_processes_shown_name || res.v2_processes;
+ addProcessList(v2ProcessList, v2ProcessShownNameList);
$('input[name="v2Process"]').on('change', () => {
const selectedProcess = getCheckedV2Processes();
if (selectedProcess.length) {
@@ -701,7 +708,7 @@ const saveCSVDataSource = (isV2=false) => {
$(dbElements.saveDataSourceModal).modal('show');
v2DataSources = v2DatasourceByProcess;
- } else {
+ } else if (v2DatasourceByProcess.length == 1) {
saveV2DataSource(v2DatasourceByProcess);
}
} else {
@@ -1703,7 +1710,8 @@ const getV2ProcessData = (dictDataSrc) => {
if (v2SelectedProcess.length) {
v2SelectedProcess.forEach(processName => {
const subDatasourceByProcess = JSON.parse(JSON.stringify(dictDataSrc));
- subDatasourceByProcess.name = `${subDatasourceByProcess.name}_${processName}`;
+ const suffix = processName === DUMMY_V2_PROCESS_NAME ? '' : `_${processName}`;
+ subDatasourceByProcess.name = `${subDatasourceByProcess.name}${suffix}`;
subDatasourceByProcess.csv_detail.process_name = processName;
subDatasourceByProcess.csv_detail.auto_link = false;
v2Datasources.push(subDatasourceByProcess);
diff --git a/ap/static/setting_module/js/proc_config.js b/ap/static/setting_module/js/proc_config.js
index 5f4baca..ca3b2b7 100644
--- a/ap/static/setting_module/js/proc_config.js
+++ b/ap/static/setting_module/js/proc_config.js
@@ -14,8 +14,6 @@ const procElements = {
divProcConfig: '#accordionPC',
};
-const serialNo = ['シリアルNo', 'シリアル']
-
const i18n = {
statusDone: $('#i18nStatusDone').text(),
statusImporting: $('#i18nStatusImporting').text(),
@@ -138,7 +136,7 @@ const genColConfigHTML = (col, isAddNew = true) => {
// if v2 col_name is シリアルNo -> auto check
if (!isSerial && isAddNew) {
- isSerial = serialNo.includes(col.column_name) ? 'checked' : '';
+ isSerial = /^.*シリアル|serial.*$/.test(col.column_name.toString().toLowerCase()) ? 'checked' : '';
}
return `