From e41059e58c07d0fb5279b09a4d436ddf7cb5db61 Mon Sep 17 00:00:00 2001 From: OMGToFo <93513240+OMGToFo@users.noreply.github.com> Date: Sat, 30 Dec 2023 07:36:01 +0100 Subject: [PATCH] 2023.12.30.08 u.a. mit combined datasets --- app.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/app.py b/app.py index 8abe132..daabc6d 100644 --- a/app.py +++ b/app.py @@ -31,12 +31,12 @@ col_names_labels_df = pd.DataFrame() # File upload widget -file = st.file_uploader("Upload SPSS file", type=[".sav"]) +file = st.sidebar.file_uploader("Upload SPSS file", type=[".sav"]) -st.write("") -latinEncoding= st.checkbox("Deactivate latin-1 decoding if you get an error message. For me latin-1 has worked better, therefore it's set as default", value=True) +st.sidebar.write("") +latinEncoding= st.sidebar.checkbox("Deactivate latin-1 decoding if you get an error message. For me latin-1 has worked better, therefore it's set as default", value=True) st.write("") #if file is None: @@ -65,13 +65,13 @@ - st.write("") - st.write("") - rename_columns = st.checkbox("Rename column names with labels \n (Attention - as of now there have to be Variable Labels in SPSS to all Variables!") - st.write("") - st.write("") - dropEmptyColumns= st.checkbox("Drop all columns that only contain Nan or None Values - helps if renaming does not work") + st.sidebar.write("") + rename_columns = st.sidebar.checkbox("Rename column names with labels \n (Attention - as of now there have to be Variable Labels in SPSS to all Variables!") + + + st.sidebar.write("") + dropEmptyColumns= st.sidebar.checkbox("Drop all columns that only contain Nan or None Values - helps if renaming does not work") if dropEmptyColumns: labelledData = labelledData.dropna(axis=1, how='all') rawData = rawData.dropna(axis=1, how='all') @@ -93,7 +93,7 @@ rawDataExpander = st.expander("Show & save Raw Data?") ############################ with rawDataExpander: - st.write("## Raw Data") + st.write("## Raw Data without labels") if rename_columns: @@ -135,6 +135,12 @@ st.write(rawData) + #st.info("columns with numeric data types") + #numeric_df = rawData.select_dtypes(include='number') + #st.write(numeric_df) + + + def to_excel(rawData): output = BytesIO() writer = pd.ExcelWriter(output, engine='xlsxwriter') @@ -419,7 +425,7 @@ def to_excel(labelledData): if len(rawData)>1 and len(labelledData)>1: - MergedDataExpander = st.expander("Create a new dataset with chosen variables?") ############################ + MergedDataExpander = st.expander("Ccombined datasets") ############################ with MergedDataExpander: # Load the data frames @@ -434,7 +440,67 @@ def to_excel(labelledData): df1_prefixed = df1.add_prefix('numeric_') df2_prefixed = df2.add_prefix('categorical_') - # Let the user select columns to merge + #st.write(df1_prefixed) + #st.write(df2_prefixed) + st.subheader("Combinded Dataset containing all categorical and numerical Variables") + #mergeAll = st.button("Merge all columns with categorical and numerical data?") + mergeAll = True + mergeAll_df = pd.DataFrame() + if mergeAll: + + _=""" + mergeAll_df = df2_prefixed.merge( + df1_prefixed, + left_on='categorical_Participant', + right_on='numeric_Participant', + # You can choose 'inner', 'outer', 'left', or 'right' depending on your requirements + ) + """ + + mergeAll_df = pd.merge(df2_prefixed, df1_prefixed, left_index=True, right_index=True) + + + + # Display the merged data frame + if len(mergeAll_df)>0: + #st.write("## Combined dataset with all variables") + + st.write(mergeAll_df) + #st.write(mergeAll_df.describe()) + + + + + if len(mergeAll_df)>0: + + def to_excel(mergeAll_df): + output = BytesIO() + writer = pd.ExcelWriter(output, engine='xlsxwriter') + mergeAll_df.to_excel(writer, index=True, sheet_name='Sheet1') + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + format1 = workbook.add_format({'num_format': '0.00'}) + worksheet.set_column('A:A', None, format1) + writer.close() + processed_data = output.getvalue() + return processed_data + + + df_xlsx = to_excel(mergeAll_df) + st.download_button(label='📥 Export combined dataset to Excel?', + data=df_xlsx, + file_name='SPSSallCombinedColumnsToExcel.xlsx') + + + + + + + + # Let the user select columns to merge ##################### + st.divider() + st.subheader("") + st.subheader("Create a new dataset of chosen Variables") selected_categorical_cols = st.multiselect('Select catgorical variables (men/women, old/young..):', categorical_cols) st.write("") st.write("") @@ -445,8 +511,6 @@ def to_excel(labelledData): # Merge the selected columns merged_df = pd.DataFrame() - # Merge numeric columns - # Merge categorical columns for col in selected_categorical_cols: merged_df[col] = df2_prefixed['categorical_' + col] @@ -519,9 +583,8 @@ def to_excel(labelledData): # Display the merged data frame if len(merged_df)>0: - st.write("## Combined dataset") - if rename_columns: - st.info("Datafile with renamed columns") + st.write() +