From e41059e58c07d0fb5279b09a4d436ddf7cb5db61 Mon Sep 17 00:00:00 2001
From: OMGToFo <93513240+OMGToFo@users.noreply.github.com>
Date: Sat, 30 Dec 2023 07:36:01 +0100
Subject: [PATCH] 2023.12.30.08 u.a. mit combined datasets

---
 app.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 17 deletions(-)

diff --git a/app.py b/app.py
index 8abe132..daabc6d 100644
--- a/app.py
+++ b/app.py
@@ -31,12 +31,12 @@
 col_names_labels_df = pd.DataFrame()
 
 # File upload widget
-file = st.file_uploader("Upload SPSS file", type=[".sav"])
+file = st.sidebar.file_uploader("Upload SPSS file", type=[".sav"])
 
 
 
-st.write("")
-latinEncoding= st.checkbox("Deactivate latin-1 decoding if you get an error message. For me latin-1 has worked better, therefore it's set as default", value=True)
+st.sidebar.write("")
+latinEncoding= st.sidebar.checkbox("Deactivate latin-1 decoding if you get an error message. For me latin-1 has worked better, therefore it's set as default", value=True)
 st.write("")
 
 #if file is None:
@@ -65,13 +65,13 @@
 
 
 
-        st.write("")
-        st.write("")
-        rename_columns = st.checkbox("Rename column names with labels \n (Attention - as of now there have to be Variable Labels in SPSS to all Variables!")
-        st.write("")
 
-        st.write("")
-        dropEmptyColumns= st.checkbox("Drop all columns that only contain Nan or None Values - helps if renaming does not work")
+        st.sidebar.write("")
+        rename_columns = st.sidebar.checkbox("Rename column names with labels \n (Attention - as of now there have to be Variable Labels in SPSS to all Variables!")
+
+
+        st.sidebar.write("")
+        dropEmptyColumns= st.sidebar.checkbox("Drop all columns that only contain Nan or None Values - helps if renaming does not work")
         if dropEmptyColumns:
             labelledData = labelledData.dropna(axis=1, how='all')
             rawData = rawData.dropna(axis=1, how='all')
@@ -93,7 +93,7 @@
 
         rawDataExpander = st.expander("Show & save Raw Data?")  ############################
         with rawDataExpander:
-            st.write("## Raw Data")
+            st.write("## Raw Data without labels")
 
 
             if rename_columns:
@@ -135,6 +135,12 @@
 
             st.write(rawData)
 
+            #st.info("columns with numeric data types")
+            #numeric_df = rawData.select_dtypes(include='number')
+            #st.write(numeric_df)
+
+
+
             def to_excel(rawData):
                 output = BytesIO()
                 writer = pd.ExcelWriter(output, engine='xlsxwriter')
@@ -419,7 +425,7 @@ def to_excel(labelledData):
         if len(rawData)>1 and len(labelledData)>1:
 
 
-            MergedDataExpander = st.expander("Create a new dataset with chosen variables?") ############################
+            MergedDataExpander = st.expander("Ccombined datasets") ############################
             with MergedDataExpander:
 
                 # Load the data frames
@@ -434,7 +440,67 @@ def to_excel(labelledData):
                 df1_prefixed = df1.add_prefix('numeric_')
                 df2_prefixed = df2.add_prefix('categorical_')
 
-                # Let the user select columns to merge
+                #st.write(df1_prefixed)
+                #st.write(df2_prefixed)
+                st.subheader("Combinded Dataset containing all categorical and numerical Variables")
+                #mergeAll = st.button("Merge all columns with categorical and numerical data?")
+                mergeAll = True
+                mergeAll_df = pd.DataFrame()
+                if mergeAll:
+
+                    _="""
+                    mergeAll_df = df2_prefixed.merge(
+                        df1_prefixed,
+                        left_on='categorical_Participant',
+                        right_on='numeric_Participant',
+                        # You can choose 'inner', 'outer', 'left', or 'right' depending on your requirements
+                    )
+                    """
+
+                    mergeAll_df = pd.merge(df2_prefixed, df1_prefixed, left_index=True, right_index=True)
+
+
+
+                # Display the merged data frame
+                if len(mergeAll_df)>0:
+                    #st.write("## Combined dataset with all variables")
+
+                    st.write(mergeAll_df)
+                    #st.write(mergeAll_df.describe())
+
+
+
+
+                if len(mergeAll_df)>0:
+
+                    def to_excel(mergeAll_df):
+                        output = BytesIO()
+                        writer = pd.ExcelWriter(output, engine='xlsxwriter')
+                        mergeAll_df.to_excel(writer, index=True, sheet_name='Sheet1')
+                        workbook = writer.book
+                        worksheet = writer.sheets['Sheet1']
+                        format1 = workbook.add_format({'num_format': '0.00'})
+                        worksheet.set_column('A:A', None, format1)
+                        writer.close()
+                        processed_data = output.getvalue()
+                        return processed_data
+
+
+                    df_xlsx = to_excel(mergeAll_df)
+                    st.download_button(label='📥 Export combined dataset to Excel?',
+                                       data=df_xlsx,
+                                       file_name='SPSSallCombinedColumnsToExcel.xlsx')
+
+
+
+
+
+
+
+                # Let the user select columns to merge #####################
+                st.divider()
+                st.subheader("")
+                st.subheader("Create a new dataset of chosen Variables")
                 selected_categorical_cols = st.multiselect('Select catgorical variables (men/women, old/young..):', categorical_cols)
                 st.write("")
                 st.write("")
@@ -445,8 +511,6 @@ def to_excel(labelledData):
                 # Merge the selected columns
                 merged_df = pd.DataFrame()
 
-                # Merge numeric columns
-
                 # Merge categorical columns
                 for col in selected_categorical_cols:
                     merged_df[col] = df2_prefixed['categorical_' + col]
@@ -519,9 +583,8 @@ def to_excel(labelledData):
 
                 # Display the merged data frame
                 if len(merged_df)>0:
-                    st.write("## Combined dataset")
-                    if rename_columns:
-                        st.info("Datafile with renamed columns")
+                    st.write()
+