Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Couche fondation pour la consolidation IRVE statique (DataFrame) #4288

Merged
merged 25 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
495 changes: 495 additions & 0 deletions apps/shared/meta/schema-irve-statique.json

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions apps/transport/lib/irve/data_frame.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
defmodule Transport.IRVE.DataFrame do
@moduledoc """
Tooling supporting the parsing of an IRVE static file into `Explorer.DataFrame`
"""

@doc """
Helper function to convert TableSchema types into DataFrame ones.

There is no attempt to make this generic at this point, it is focusing solely
on the static IRVE use.

iex> Transport.IRVE.DataFrame.remap_schema_type(:geopoint)
:string
iex> Transport.IRVE.DataFrame.remap_schema_type(:number)
{:u, 16}
iex> Transport.IRVE.DataFrame.remap_schema_type(:literally_anything)
:literally_anything
"""
def remap_schema_type(input_type) do
case input_type do
:geopoint -> :string
:number -> {:u, 16}
type -> type
end
end

@doc """
Parse an in-memory binary of CSV content into a typed `Explorer.DataFrame` for IRVE use.
"""
def dataframe_from_csv_body!(body, schema \\ Transport.IRVE.StaticIRVESchema.schema_content()) do
dtypes =
schema
|> Map.fetch!("fields")
|> Enum.map(fn %{"name" => name, "type" => type} ->
{String.to_atom(name), String.to_atom(type) |> Transport.IRVE.DataFrame.remap_schema_type()}
end)

Explorer.DataFrame.load_csv!(body, dtypes: dtypes)
end
end
19 changes: 19 additions & 0 deletions apps/transport/lib/irve/static_irve_schema.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defmodule Transport.IRVE.StaticIRVESchema do
@moduledoc """
A module providing programmatic access to the static IRVE schema,
as stored in the source code.
"""

@doc """
Read & decode the content of the IRVE static schema.

NOTE: this is not cached at the moment.
"""
def schema_content do
__ENV__.file
|> Path.join("../../../../shared/meta/schema-irve-statique.json")
|> Path.expand()
|> File.read!()
|> Jason.decode!()
end
end
1 change: 1 addition & 0 deletions apps/transport/mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ defmodule Transport.Mixfile do
{:unzip, "~> 0.8"},
{:protobuf, "~> 0.11"},
{:nimble_csv, "~> 1.2.0"},
{:explorer, "~> 0.10.0"},
{:kino, "~> 0.6", only: :dev},
# db
{:ecto, "~> 3.7"},
Expand Down
53 changes: 53 additions & 0 deletions apps/transport/test/support/factory.ex
Original file line number Diff line number Diff line change
Expand Up @@ -449,4 +449,57 @@ defmodule DB.Factory do
"schema" => %{"name" => Keyword.get(opts, :schema_name), "version" => Keyword.get(opts, :schema_version)}
}
end

defmodule IRVE do
@doc """
Generate a row following the IRVE static schema.

See:
- https://schema.data.gouv.fr/etalab/schema-irve-statique/
"""
def generate_row do
%{
"nom_amenageur" => "Métropole de Nulle Part",
"siren_amenageur" => "123456782",
"contact_amenageur" => "[email protected]",
"nom_operateur" => "Opérateur de Charge",
"contact_operateur" => "[email protected]",
"telephone_operateur" => "0199456782",
"nom_enseigne" => "Réseau de recharge",
"id_station_itinerance" => "FRPAN99P12345678",
"id_station_local" => "station_001",
"nom_station" => "Ma Station",
"implantation_station" => "Lieu de ma station",
"adresse_station" => "26 rue des écluses, 17430 Champdolent",
"code_insee_commune" => "17085",
"coordonneesXY" => "[-0.799141,45.91914]",
"nbre_pdc" => 1,
"id_pdc_itinerance" => "FRPAN99E12345678",
"id_pdc_local" => "pdc_001",
"puissance_nominale" => 22,
"prise_type_ef" => false,
"prise_type_2" => true,
"prise_type_combo_ccs" => false,
"prise_type_chademo" => false,
"prise_type_autre" => false,
"gratuit" => false,
"paiement_acte" => true,
"paiement_cb" => true,
"paiement_autre" => true,
"tarification" => "2,50€ / 30min puis 0,025€ / minute",
"condition_acces" => "Accès libre",
"reservation" => false,
"horaires" => "24/7",
"accessibilite_pmr" => "Accessible mais non réservé PMR",
"restriction_gabarit" => "Hauteur maximale 2.30m",
"station_deux_roues" => false,
"raccordement" => "Direct",
"num_pdl" => "12345678912345",
"date_mise_en_service" => "2024-10-02",
"observations" => "Station située au niveau -1 du parking",
"date_maj" => "2024-10-17",
"cable_t2_attache" => false
}
end
end
end
65 changes: 65 additions & 0 deletions apps/transport/test/transport/irve/irve_data_frame_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
defmodule Transport.IRVE.DataFrameTest do
use ExUnit.Case, async: true
doctest Transport.IRVE.DataFrame

test "schema content" do
data =
Transport.IRVE.StaticIRVESchema.schema_content()
|> Map.fetch!("fields")
|> Enum.at(0)
|> Map.take(["name", "type"])

assert data == %{"name" => "nom_amenageur", "type" => "string"}
end

test "dataframe roundtrip (encode + decode)" do
body = [DB.Factory.IRVE.generate_row()] |> CSV.encode(headers: true) |> Enum.join()
df = Transport.IRVE.DataFrame.dataframe_from_csv_body!(body)
maps = Explorer.DataFrame.to_rows(df)

assert maps == [
%{
"nom_amenageur" => "Métropole de Nulle Part",
"siren_amenageur" => "123456782",
"contact_amenageur" => "[email protected]",
"nom_operateur" => "Opérateur de Charge",
"contact_operateur" => "[email protected]",
"telephone_operateur" => "0199456782",
"nom_enseigne" => "Réseau de recharge",
"id_station_itinerance" => "FRPAN99P12345678",
"id_station_local" => "station_001",
"nom_station" => "Ma Station",
"implantation_station" => "Lieu de ma station",
"adresse_station" => "26 rue des écluses, 17430 Champdolent",
"code_insee_commune" => "17085",
"coordonneesXY" => "[-0.799141,45.91914]",
"nbre_pdc" => 1,
"id_pdc_itinerance" => "FRPAN99E12345678",
"id_pdc_local" => "pdc_001",
"puissance_nominale" => 22,
"prise_type_ef" => false,
"prise_type_2" => true,
"prise_type_combo_ccs" => false,
"prise_type_chademo" => false,
"prise_type_autre" => false,
"gratuit" => false,
"paiement_acte" => true,
"paiement_cb" => true,
"paiement_autre" => true,
"tarification" => "2,50€ / 30min puis 0,025€ / minute",
"condition_acces" => "Accès libre",
"reservation" => false,
"horaires" => "24/7",
"accessibilite_pmr" => "Accessible mais non réservé PMR",
"restriction_gabarit" => "Hauteur maximale 2.30m",
"station_deux_roues" => false,
"raccordement" => "Direct",
"num_pdl" => "12345678912345",
"date_mise_en_service" => ~D[2024-10-02],
"observations" => "Station située au niveau -1 du parking",
"date_maj" => ~D[2024-10-17],
"cable_t2_attache" => false
}
]
end
end
3 changes: 3 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"appsignal": {:hex, :appsignal, "2.12.1", "08cbb95b8bd3b2cc8116a78df2f51df4e0815cb9fdab55b8ac13adba25fe9187", [:make, :mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:decorator, "~> 1.2.3 or ~> 1.3", [hex: :decorator, repo: "hexpm", optional: false]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "985cc9270c41a858344b2b3c5137ac9b848756e66a909215fae5b9e093fc8c66"},
"appsignal_phoenix": {:hex, :appsignal_phoenix, "2.4.1", "94c67067373ccb73245edca73d1c0d8108d89454b4277d2fb6c6fec387c0fd12", [:mix], [{:appsignal, ">= 2.11.0 and < 3.0.0", [hex: :appsignal, repo: "hexpm", optional: false]}, {:appsignal_plug, ">= 2.0.15 and < 3.0.0", [hex: :appsignal_plug, repo: "hexpm", optional: false]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.11 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:phoenix_live_view, "~> 0.9 or ~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e1d35768bd93e4c35cb93ce2f7994287b168a084fac24287388c8d2f9b2a20fd"},
"appsignal_plug": {:hex, :appsignal_plug, "2.0.15", "758a8a78944878e8461bbc77ca86219121a56f4299c6d79940ab083cf9afea00", [:mix], [{:appsignal, ">= 2.7.6 and < 3.0.0", [hex: :appsignal, repo: "hexpm", optional: false]}, {:plug, ">= 1.1.0", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "1c6059049e2081e808aaef04e2b9917e06277f61a35a0e103db860d08cbc41f1"},
"aws_signature": {:hex, :aws_signature, "0.3.2", "adf33bc4af00b2089b7708bf20e3246f09c639a905a619b3689f0a0a22c3ef8f", [:rebar3], [], "hexpm", "b0daf61feb4250a8ab0adea60db3e336af732ff71dd3fb22e45ae3dcbd071e44"},
"bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
"bypass": {:hex, :bypass, "2.1.0", "909782781bf8e20ee86a9cabde36b259d44af8b9f38756173e8f5e2e1fabb9b1", [:mix], [{:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: false]}, {:ranch, "~> 1.3", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "d9b5df8fa5b7a6efa08384e9bbecfe4ce61c77d28a4282f79e02f1ef78d96b80"},
"cachex": {:hex, :cachex, "3.6.0", "14a1bfbeee060dd9bec25a5b6f4e4691e3670ebda28c8ba2884b12fe30b36bf8", [:mix], [{:eternal, "~> 1.2", [hex: :eternal, repo: "hexpm", optional: false]}, {:jumper, "~> 1.0", [hex: :jumper, repo: "hexpm", optional: false]}, {:sleeplocks, "~> 1.1", [hex: :sleeplocks, repo: "hexpm", optional: false]}, {:unsafe, "~> 1.0", [hex: :unsafe, repo: "hexpm", optional: false]}], "hexpm", "ebf24e373883bc8e0c8d894a63bbe102ae13d918f790121f5cfe6e485cc8e2e2"},
Expand Down Expand Up @@ -50,6 +51,7 @@
"exactor": {:hex, :exactor, "2.2.4", "5efb4ddeb2c48d9a1d7c9b465a6fffdd82300eb9618ece5d34c3334d5d7245b1", [:mix], [], "hexpm", "1222419f706e01bfa1095aec9acf6421367dcfab798a6f67c54cf784733cd6b5"},
"excoveralls": {:hex, :excoveralls, "0.18.2", "86efd87a0676a3198ff50b8c77620ea2f445e7d414afa9ec6c4ba84c9f8bdcc2", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "230262c418f0de64077626a498bd4fdf1126d5c2559bb0e6b43deac3005225a4"},
"exjsx": {:hex, :exjsx, "4.0.0", "60548841e0212df401e38e63c0078ec57b33e7ea49b032c796ccad8cde794b5c", [:mix], [{:jsx, "~> 2.8.0", [hex: :jsx, repo: "hexpm", optional: false]}], "hexpm", "32e95820a97cffea67830e91514a2ad53b888850442d6d395f53a1ac60c82e07"},
"explorer": {:hex, :explorer, "0.10.0", "ba690afb59fce81746a1b6c9d25294aabcb9bae783ceeb43f5fd4834e1e16d78", [:mix], [{:adbc, "~> 0.1", [hex: :adbc, repo: "hexpm", optional: true]}, {:aws_signature, "~> 0.3", [hex: :aws_signature, repo: "hexpm", optional: false]}, {:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:decimal, "~> 2.1", [hex: :decimal, repo: "hexpm", optional: false]}, {:flame, "~> 0.3", [hex: :flame, repo: "hexpm", optional: true]}, {:fss, "~> 0.1", [hex: :fss, repo: "hexpm", optional: false]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: true]}, {:rustler, "~> 0.34.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.7", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}, {:table, "~> 0.1.2", [hex: :table, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1 or ~> 4.0.0", [hex: :table_rex, repo: "hexpm", optional: false]}], "hexpm", "874b6af1f711186f391b507af293995f89311821486e01cbca4c99777ff20864"},
"expo": {:hex, :expo, "0.5.2", "beba786aab8e3c5431813d7a44b828e7b922bfa431d6bfbada0904535342efe2", [:mix], [], "hexpm", "8c9bfa06ca017c9cb4020fabe980bc7fdb1aaec059fd004c2ab3bff03b1c599c"},
"exvcr": {:hex, :exvcr, "0.15.1", "772db4d065f5136c6a984c302799a79e4ade3e52701c95425fa2229dd6426886", [:mix], [{:exactor, "~> 2.2", [hex: :exactor, repo: "hexpm", optional: false]}, {:exjsx, "~> 4.0", [hex: :exjsx, repo: "hexpm", optional: false]}, {:finch, "~> 0.16", [hex: :finch, repo: "hexpm", optional: true]}, {:httpoison, "~> 1.0 or ~> 2.0", [hex: :httpoison, repo: "hexpm", optional: true]}, {:httpotion, "~> 3.1", [hex: :httpotion, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:meck, "~> 0.8", [hex: :meck, repo: "hexpm", optional: false]}], "hexpm", "de4fc18b1d672d9b72bc7468735e19779aa50ea963a1f859ef82cd9e294b13e3"},
"file_system": {:hex, :file_system, "1.0.0", "b689cc7dcee665f774de94b5a832e578bd7963c8e637ef940cd44327db7de2cd", [:mix], [], "hexpm", "6752092d66aec5a10e662aefeed8ddb9531d79db0bc145bb8c40325ca1d8536d"},
Expand Down Expand Up @@ -113,6 +115,7 @@
"recon": {:hex, :recon, "2.5.5", "c108a4c406fa301a529151a3bb53158cadc4064ec0c5f99b03ddb8c0e4281bdf", [:mix, :rebar3], [], "hexpm", "632a6f447df7ccc1a4a10bdcfce71514412b16660fe59deca0fcf0aa3c054404"},
"remote_ip": {:hex, :remote_ip, "1.2.0", "fb078e12a44414f4cef5a75963c33008fe169b806572ccd17257c208a7bc760f", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "2ff91de19c48149ce19ed230a81d377186e4412552a597d6a5137373e5877cb7"},
"req": {:hex, :req, "0.4.14", "103de133a076a31044e5458e0f850d5681eef23dfabf3ea34af63212e3b902e2", [:mix], [{:aws_signature, "~> 0.3.2", [hex: :aws_signature, repo: "hexpm", optional: true]}, {:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:nimble_ownership, "~> 0.2.0 or ~> 0.3.0", [hex: :nimble_ownership, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "2ddd3d33f9ab714ced8d3c15fd03db40c14dbf129003c4a3eb80fac2cc0b1b08"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.8.2", "5f25cbe220a8fac3e7ad62e6f950fcdca5a5a5f8501835d2823e8c74bf4268d5", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "63d1bd5f8e23096d1ff851839923162096364bac8656a4a3c00d1fff8e83ee0a"},
"saxy": {:hex, :saxy, "1.5.0", "0141127f2d042856f135fb2d94e0beecda7a2306f47546dbc6411fc5b07e28bf", [:mix], [], "hexpm", "ea7bb6328fbd1f2aceffa3ec6090bfb18c85aadf0f8e5030905e84235861cf89"},
"scrivener": {:hex, :scrivener, "2.7.2", "1d913c965ec352650a7f864ad7fd8d80462f76a32f33d57d1e48bc5e9d40aba2", [:mix], [], "hexpm", "7866a0ec4d40274efbee1db8bead13a995ea4926ecd8203345af8f90d2b620d9"},
"scrivener_ecto": {:hex, :scrivener_ecto, "2.7.0", "cf64b8cb8a96cd131cdbcecf64e7fd395e21aaa1cb0236c42a7c2e34b0dca580", [:mix], [{:ecto, "~> 3.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:scrivener, "~> 2.4", [hex: :scrivener, repo: "hexpm", optional: false]}], "hexpm", "e809f171687806b0031129034352f5ae44849720c48dd839200adeaf0ac3e260"},
Expand Down
11 changes: 11 additions & 0 deletions scripts/irve/data-frame.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# https://www.data.gouv.fr/fr/datasets/623ca46c13130c3228abd018/ - Electra dataset (mid-sized)
# https://www.data.gouv.fr/fr/datasets/623ca46c13130c3228abd018/#/resources/e9bb3424-77cd-40ba-8bbd-5a19362d0365

sample_url = "https://www.data.gouv.fr/fr/datasets/r/e9bb3424-77cd-40ba-8bbd-5a19362d0365"

# Note: cached in development if you set `irve_consolidation_caching: true` in `dev.secret.exs`
%Req.Response{status: 200, body: body} =
Transport.IRVE.Fetcher.get!(sample_url, compressed: false, decode_body: false)

Transport.IRVE.DataFrame.dataframe_from_csv_body!(body)
|> IO.inspect(IEx.inspect_opts())