Skip to content

Commit

Permalink
2023.06.21.12 mit Pygwalker, crosstables, editabe df
Browse files Browse the repository at this point in the history
  • Loading branch information
OMGToFo authored Jun 21, 2023
1 parent abfa914 commit 48ddb10
Showing 1 changed file with 183 additions and 52 deletions.
235 changes: 183 additions & 52 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from streamlit_pandas_profiling import st_profile_report
from pandas_profiling import ProfileReport

#2023.06.18 pygwalker visualization library
import pygwalker as pyg

#These are the visualization libraries. Matplotlib is standard and is what most people use.
#Seaborn works on top of matplotlib, as we mentioned in the course.
Expand Down Expand Up @@ -54,7 +56,7 @@
st.write("")
st.write("")
st.write("")
rename_columns = st.checkbox("Rename column names with labels \n (attention - as of now there have to be Variable Labels to all Variables)")
rename_columns = st.checkbox("Rename column names with labels \n (Attention - as of now there have to be Variable Labels in SPSS to all Variables!")
st.write("")
st.write("")
st.write("")
Expand Down Expand Up @@ -366,7 +368,7 @@ def to_excel(labelledData):
if len(rawData)>1 and len(labelledData)>1:


MergedDataExpander = st.expander("Merge categorical and numerical variables to a new dataset?") ############################
MergedDataExpander = st.expander("Create a new dataset with chosen variables?") ############################
with MergedDataExpander:

# Load the data frames
Expand All @@ -383,9 +385,9 @@ def to_excel(labelledData):

# Let the user select columns to merge
st.write("")
selected_numeric_cols = st.multiselect('Select columns with numeric values (brand awareness 0-100, sympathy 1-7,...) :', numeric_cols)
selected_numeric_cols = st.multiselect('Select numeric variables (brand awareness 0-100, sympathy 1-7,...) :', numeric_cols)
st.write("")
selected_categorical_cols = st.multiselect('Select columns with categories (men/women, old/young..):', categorical_cols)
selected_categorical_cols = st.multiselect('Select catgorical variables (men/women, old/young..):', categorical_cols)
st.write("")
# Merge the selected columns
merged_df = pd.DataFrame()
Expand All @@ -402,42 +404,139 @@ def to_excel(labelledData):
merged_df[col] = df1_prefixed['numeric_' + col]


st.write("## Combined dataset")
if rename_columns:
st.info("Datafile with renamed columns")
st.write("")
st.write("")

# Recode Variables? #######################

if st.checkbox("Select Variables to recode while changing the original variables?"):
selected_RecodeColumns = st.multiselect('Select variables to recode', merged_df.columns)
if selected_RecodeColumns:
recode_data = {}
# Recode values
for column in selected_RecodeColumns:
unique_values = merged_df[column].unique()
st.info(f"Recode {column}")
recode_map = {}
for value in unique_values:
new_value = st.text_input(f"Insert new value for {value}", value, key=f"new_valueRecode{value}")
recode_map[value] = new_value
recode_data[column] = recode_map

recode_button = st.checkbox(":point_right: :orange[Now recode these values and replace the original variables!]", key="recode_button")
if recode_button:
#st.subheader("Recoded Dataset")
#recoded_df = merged_df.copy()
for column, recode_map in recode_data.items():
merged_df[column] = merged_df[column].map(recode_map)



st.write("")
st.write("")

if st.checkbox("Select Variables to recode into new Variables?"):
selected_RecodeToNewColumns = st.multiselect('Select the variables to recode into new Variables', merged_df.columns, key="selected_RecodeToNewColumns")
if selected_RecodeToNewColumns:
recode_data = {}
# Recode values
for column in selected_RecodeToNewColumns:
unique_values = merged_df[column].unique()
st.info(f"Recode {column}")
recode_map = {}
for value in unique_values:
new_value = st.text_input(f"New value for {value}", value, key=f"new_valueRecodeToNew{value}")
recode_map[value] = new_value
recode_data[column] = recode_map

recodetoNewVariable_button = st.checkbox(":point_right: :orange[Now Recode these values into new variables!]", key="recodetoNewVariable_button")
if recodetoNewVariable_button:
#st.subheader("Recoded Dataset with new recodes Variables")
#recoded_df = merged_df.copy()
for column, recode_map in recode_data.items():
merged_df[column + "_recoded"] = merged_df[column].map(recode_map)






st.write("")
st.write("")

# Display the merged data frame
st.write("Table with selected columns (merged_df):")
st.dataframe(merged_df)
if len(merged_df)>0:
st.write("## Combined dataset")
if rename_columns:
st.info("Datafile with renamed columns")
#st.subheader("Dataset with selected columns (merged_df):")
#st.dataframe(merged_df)

merged_df = st.experimental_data_editor(merged_df, num_rows="dynamic")

#st.write("merged_df.columns", merged_df.columns)


#st.write("Editable Table with selected columns (edited_df):")
#Test mit editierbares dataframe
#edited_df = st.experimental_data_editor(merged_df,use_container_width=True,num_rows="dynamic",)

st.write("")
st.write("")

def to_excel(merged_df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
merged_df.to_excel(writer, index=True, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format1 = workbook.add_format({'num_format': '0.00'})
worksheet.set_column('A:A', None, format1)
writer.save()
processed_data = output.getvalue()
return processed_data


df_xlsx = to_excel(merged_df)
st.download_button(label='📥 Export Table with the selected columns to Excel?',
data=df_xlsx,
file_name='SPSSselectedColumnsToExcel.xlsx')


def to_excel(merged_df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
merged_df.to_excel(writer, index=True, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format1 = workbook.add_format({'num_format': '0.00'})
worksheet.set_column('A:A', None, format1)
writer.save()
processed_data = output.getvalue()
return processed_data


df_xlsx = to_excel(merged_df)
st.download_button(label='📥 Export Table with the selected columns to Excel?',
data=df_xlsx,
file_name='SPSSselectedColumnsToExcel.xlsx')



st.write("")
st.write("")
if st.checkbox("Explore the dataset visually?"):
def load_config(file_path):
with open(file_path, 'r') as config_file:
config_str = config_file.read()
return config_str


config = load_config('config.json')

pyg.walk(merged_df, env='Streamlit', dark='dark', spec=config)

st.write("")
st.write("")









#Tabellen mit Häufigkeiten und Prozenten #########################################

if st.checkbox("Show Frequencies and Percentages of Values for every chosen Variable"):
if st.checkbox("Show frequencies and percentages of values for every chosen variable"):

individualTables = st.checkbox("Show individual tables for every Variable")
individualTables = st.checkbox("Show individual tables for every variable")

prozente_anzahl_df = pd.DataFrame()
for column in merged_df.columns[0:]:
Expand All @@ -453,7 +552,7 @@ def to_excel(merged_df):
#st.write(anzahl_df)
prozente_df['Cases'] = anzahl_df.Anzahl

prozente_df = prozente_df.sort_values('Label')
#prozente_df = prozente_df.sort_values('Label') #gibt leider manchmal fehlermeldung wenn zahlen vorkommen..

prozente_anzahl_df = prozente_anzahl_df.append(prozente_df)

Expand All @@ -464,7 +563,7 @@ def to_excel(merged_df):

st.write("")
st.write("")
st.subheader("Table with all percentages and frequencies of the selected variables")
st.subheader("All column-percentages and frequencies of the selected variables in one Table:")
st.write(prozente_anzahl_df)


Expand Down Expand Up @@ -493,14 +592,6 @@ def to_excel(prozente_anzahl_df):











#dataframe mit den häufigkeiten der Kombinationen ####################
AlleKombinationenProzent = merged_df[selected_categorical_cols].value_counts(normalize=True).reset_index()
#AlleKombinationenProzent.columns.values[0] = "Label"
Expand All @@ -509,7 +600,7 @@ def to_excel(prozente_anzahl_df):
st.dataframe(AlleKombinationenProzent)



_="""
if st.checkbox("Show labelling/unique values?"):
anzahlVariablen = len(selected_numeric_cols) + len(selected_categorical_cols)
st.write("Anzahl Variablen: ",anzahlVariablen)
Expand All @@ -528,8 +619,9 @@ def to_excel(prozente_anzahl_df):
dict_of_merged_df= {} # initialize empty dictionary
st.info("None's are not deleted, might be a problem. I'll add functionality here if/when i figure out how")
#merged_df = merged_df.dropna(axis = 0, how ='any')
st.info("None's are not deleted automatically, can cause problems/error message. I'll add functionality here if/when i figure out how")
if st.checkbox("Delete Nones"):
merged_df = merged_df.dropna(axis = 0, how ='any')
for i in range(anzahlVariablen):
col = cols[i%anzahlVariablen]
Expand All @@ -553,15 +645,60 @@ def to_excel(prozente_anzahl_df):
dict_of_merged_df["merged_df_{}".format(i)] = merged_df[VariablenKolumnenAuswahl[i]].unique()
#col.write(dict_of_merged_df["merged_df_{}".format(i)])
"""


if st.checkbox("Show descriptive Infos?"):
st.write(merged_df.describe())
st.write("")
st.write("")



################### mehrfachantwortensets####################################
if st.checkbox("Create Multiresponse-sets"):

merged_df_KatVariablenMehrfach = labelledData
categorical_cols_forCrossTable = merged_df_KatVariablenMehrfach.columns.tolist()

st.write("")
selected_categorical_cols_forMultiResponseSet = st.multiselect('Select variables for the multiresponse-set:',categorical_cols_forCrossTable, key="selected_categorical_cols_forMultiResponseSet")
if len(selected_categorical_cols_forMultiResponseSet)>1:
# Daten in ein Pandas DataFrame laden
df_MultiresponseSet = pd.DataFrame(merged_df_KatVariablenMehrfach, columns=selected_categorical_cols_forMultiResponseSet)
st.write("df_MultiresponseSet:", df_MultiresponseSet)

# marken zählen
counts = {}
total_responses = 0
for column in df_MultiresponseSet.columns:
for brand in df_MultiresponseSet[column]:
if pd.notna(brand):
#originalcode if pd.notna(brand) and brand.strip() != '':
if brand in counts:
counts[brand] += 1
else:
counts[brand] = 1
total_responses += 1

# Ergebnisse als Tabelle anzeigen
MultiresponseSetresult_df = pd.DataFrame(list(counts.items()), columns=['Values', 'Anzahl'])
MultiresponseSetresult_df = MultiresponseSetresult_df.sort_values('Anzahl', ascending=False)

# Prozentwerte berechnen
MultiresponseSetresult_df['% Befragte'] = (MultiresponseSetresult_df['Anzahl'] / len(df)) * 100
MultiresponseSetresult_df['% Antworten'] = (MultiresponseSetresult_df['Anzahl'] / total_responses) * 100

st.subheader("Multiresponse-Set - Values and Percentages:")
st.write(MultiresponseSetresult_df)



#st.write(merged_df.dtypes)

st.write("")
st.write("")

################### cross tabulations ####################################
if st.checkbox("Create cross-tabulations?"):
st.subheader("Cross Tables with Average Values - Beta")
Expand Down Expand Up @@ -663,7 +800,7 @@ def to_excel(ThomasFormatiertesDataframe):
st.write("")


#Tabellen mit Haeufigkeiten und Prozenten - Versuch mit Kreuztabellen#########################################
#Tabellen mit Haeufigkeiten und Prozenten - mit Kreuztabellen#########################################
#Kat Variablen werden zu Spalten #############
# Generate cross table with average values

Expand Down Expand Up @@ -745,21 +882,10 @@ def to_excel(ThomasFormatiertesDataframe):



################### cross tabulations - end ####################################













################### cross tabulations - end ####################################

st.write("")
st.write("")

Expand All @@ -783,6 +909,11 @@ def to_excel(ThomasFormatiertesDataframe):
plt.title('Correlation Heatmap', fontsize=8)
st.write(fig)



################### profile reporting ####################################


st.write("")
st.write("")

Expand Down

0 comments on commit 48ddb10

Please sign in to comment.