diff --git a/WHAT-WE-LEARNED.md b/WHAT-WE-LEARNED.md index 72a09ca..72ab7ba 100644 --- a/WHAT-WE-LEARNED.md +++ b/WHAT-WE-LEARNED.md @@ -21,19 +21,19 @@ Typing might help here. I've also wondered about naming conventions, but I haven It seems like the returned value would be the same, so I would like to compress something like this: ```python @reactive.calc -def csv_fields_calc(): - return read_field_names(req(csv_path())) +def csv_labels_calc(): + return read_labels(req(csv_path())) @render.text -def csv_fields(): - return csv_fields_calc() +def csv_labels(): + return csv_labels_calc() ``` into: ```python @reactive.calc @render.text -def csv_fields(): - return read_field_names(req(csv_path())) +def csv_labels(): + return read_labels(req(csv_path())) ``` but that returns an error: ``` @@ -44,6 +44,20 @@ Renderer.__call__() missing 1 required positional argument: '_fn' It feels like a gap in the library that there is no component testing. The only advice is to pull out testable logic from the server functions, and for the rest, use end-to-end tests: There's not a recommended way to test the ui+server interaction for just one component. +## Unstated requirements for module IDs + +The [docs](https://shiny.posit.co/py/docs/modules.html#how-to-use-modules) only say: + +> This id has two requirements. First, it must be unique in a single scope, and can’t be duplicated in a given application or module definition. ... Second, the UI and server ids must match. + +But it's fussier than that: + +``` +ValueError: The string 'Will this work?' is not a valid id; only letters, numbers, and underscore are permitted +``` + +Was planning to just use the CSV column headers as IDs, but that's not going to work. Could Shiny just hash whatever we provide, so we wouldn't have to worry about this? + ## Normal tooling doesn't work inside of app? There are several bits of tooling that don't seem to work inside end-to-end app tests. My guess is that this isn't related to Shiny per se, but rather the ASGI framework: It's not running in the same process as pytest, so it's not surprising that the pytest process can't instrument this. diff --git a/dp_creator_ii/app/analysis_panel.py b/dp_creator_ii/app/analysis_panel.py index 2a256da..8f50eca 100644 --- a/dp_creator_ii/app/analysis_panel.py +++ b/dp_creator_ii/app/analysis_panel.py @@ -4,7 +4,7 @@ from dp_creator_ii.app.components.inputs import log_slider from dp_creator_ii.app.components.column_module import column_ui, column_server -from dp_creator_ii.utils.csv_helper import read_field_names +from dp_creator_ii.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names from dp_creator_ii.app.components.outputs import output_code_sample, demo_tooltip from dp_creator_ii.utils.templates import make_privacy_loss_block @@ -68,7 +68,7 @@ def _update_checkbox_group(): ui.update_checkbox_group( "columns_checkbox_group", label=None, - choices=csv_fields_calc(), + choices=csv_ids_labels_calc(), ) @reactive.effect @@ -91,10 +91,12 @@ def columns_checkbox_group_tooltip_ui(): @render.ui def columns_ui(): column_ids = input.columns_checkbox_group() + column_ids_to_names = csv_ids_names_calc() + column_ids_to_labels = csv_ids_labels_calc() for column_id in column_ids: column_server( column_id, - name=column_id, + name=column_ids_to_names[column_id], contributions=contributions(), epsilon=epsilon_calc(), set_column_weight=set_column_weight, @@ -103,19 +105,19 @@ def columns_ui(): ) return [ [ - ui.h3(column_id), + ui.h3(column_ids_to_labels[column_id]), column_ui(column_id), ] for column_id in column_ids ] @reactive.calc - def csv_fields_calc(): - return read_field_names(req(csv_path())) + def csv_ids_names_calc(): + return read_csv_ids_names(req(csv_path())) - @render.text - def csv_fields(): - return csv_fields_calc() + @reactive.calc + def csv_ids_labels_calc(): + return read_csv_ids_labels(req(csv_path())) @render.ui def epsilon_tooltip_ui(): diff --git a/dp_creator_ii/utils/csv_helper.py b/dp_creator_ii/utils/csv_helper.py index 147551b..4c279ee 100644 --- a/dp_creator_ii/utils/csv_helper.py +++ b/dp_creator_ii/utils/csv_helper.py @@ -1,7 +1,14 @@ +""" +We'll use the following terms consistently throughout the application: +- name: This is the exact column header in the CSV. +- label: This is the string we'll display. +- id: This is the string we'll pass as a module ID. +""" + import polars as pl -def read_field_names(csv_path): +def read_csv_names(csv_path): # Polars is overkill, but it is more robust against # variations in encoding than Python stdlib csv. # However, it could be slow: @@ -10,3 +17,20 @@ def read_field_names(csv_path): # > resolving its schema, which is a potentially expensive operation. lf = pl.scan_csv(csv_path) return lf.collect_schema().names() + + +def read_csv_ids_labels(csv_path): + return { + name_to_id(name): f"{i+1}: {name or '[blank]'}" + for i, name in enumerate(read_csv_names(csv_path)) + } + + +def read_csv_ids_names(csv_path): + return {name_to_id(name): name for name in read_csv_names(csv_path)} + + +def name_to_id(name): + # Shiny is fussy about module IDs, + # but we don't need them to be human readable. + return str(hash(name)).replace("-", "_") diff --git a/tests/fixtures/fake.csv b/tests/fixtures/fake.csv index baabea7..e11908a 100644 --- a/tests/fixtures/fake.csv +++ b/tests/fixtures/fake.csv @@ -1,7 +1,7 @@ -student_id,class_year,hw_number,grade -1234,1,1,90 -1234,1,2,95 -1234,1,3,85 -6789,2,1,70 -6789,2,2,100 -6789,2,3,90 +,class year,class year,hw number,hw-number,grade +1234,1,1,1,4,90 +1234,1,1,2,5,95 +1234,1,1,3,6,85 +6789,2,2,1,4,70 +6789,2,2,2,5,100 +6789,2,2,3,6,90 diff --git a/tests/test_app.py b/tests/test_app.py index 624b276..638fb90 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -62,10 +62,12 @@ def expect_no_error(): expect_visible(perform_analysis_text) expect_not_visible(download_results_text) # Columns: - expect_visible("student_id") - expect_visible("class_year") - expect_visible("hw_number") - expect_visible("grade") + expect_visible("1: [blank]") # Empty column! + expect_visible("2: class year") + expect_visible("3: class year_duplicated_0") # Duplicated column! + expect_visible("4: hw number") + expect_visible("5: hw-number") # Distinguished by non-word character! + expect_visible("6: grade") # Epsilon slider: # (Note: Slider tests failed on CI when run after column details, # although it worked locally. This works in either environment. diff --git a/tests/utils/test_csv_helper.py b/tests/utils/test_csv_helper.py index c35c39a..de6f219 100644 --- a/tests/utils/test_csv_helper.py +++ b/tests/utils/test_csv_helper.py @@ -4,7 +4,7 @@ import tempfile import pytest -from dp_creator_ii.utils.csv_helper import read_field_names +from dp_creator_ii.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names # We will not reference the encoding when reading: @@ -48,6 +48,12 @@ def test_csv_loading(write_encoding): else: pl_testing.assert_frame_equal(write_lf, read_lf) - # Preceding lines are reading the whole DF via Polars. - field_names_read = read_field_names(fp.name) - assert field_names_read == list(data.keys()) + # Preceding lines are reading the whole DF via Polars: + # Now test how we read just the headers. + # Keys are hashes, and won't be stable across platforms, + # so let's just look at the values. + ids_labels = read_csv_ids_labels(fp.name) + assert set(ids_labels.values()) == {"2: AGE", "1: NAME"} + + ids_names = read_csv_ids_names(fp.name) + assert set(ids_names.values()) == {"AGE", "NAME"}