Skip to content

Commit

Permalink
Shift parsing burden to postgres identifier utility
Browse files Browse the repository at this point in the history
  • Loading branch information
msfstef committed Oct 14, 2024
1 parent 52dcc96 commit 3fd74f0
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 33 deletions.
39 changes: 6 additions & 33 deletions packages/sync-service/lib/electric/plug/utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ defmodule Electric.Plug.Utils do
## Examples
iex> Electric.Plug.Utils.parse_columns_param("")
{:error, "Must specify at least one column"}
{:error, "Invalid zero-length delimited identifier"}
iex> Electric.Plug.Utils.parse_columns_param("foo,")
{:error, "Invalid empty column provided"}
{:error, "Invalid zero-length delimited identifier"}
iex> Electric.Plug.Utils.parse_columns_param("id")
{:ok, ["id"]}
iex> Electric.Plug.Utils.parse_columns_param("beta,alpha")
Expand All @@ -24,29 +24,18 @@ defmodule Electric.Plug.Utils do
iex> Electric.Plug.Utils.parse_columns_param(~S|\"fo\"\"o\",bar|)
{:ok, ["bar", ~S|fo"o|]}
iex> Electric.Plug.Utils.parse_columns_param(~S|"id,"name"|)
{:error, ~S|Invalid column, unmatched quote: "id|}
{:error, ~S|Invalid unquoted identifier contains special characters: "id|}
"""
@spec parse_columns_param(binary()) :: {:ok, [String.t(), ...]} | {:error, term()}
def parse_columns_param("") do
{:error, "Must specify at least one column"}
end

def parse_columns_param(columns) when is_binary(columns) do
columns
# Split by commas that are not inside quotes
|> String.split(~r/,(?=(?:[^"]*"[^"]*")*[^"]*$)/)
|> Enum.reduce_while([], fn column, acc ->
casted_column = maybe_cast_quoted_identifier(column)

cond do
contains_unescaped_quote?(casted_column) ->
{:halt, {:error, "Invalid column, unmatched quote: #{casted_column}"}}

String.trim(casted_column) == "" ->
{:halt, {:error, "Invalid empty column provided"}}

true ->
{:cont, [unescape_quotes(casted_column) | acc]}
case Electric.Postgres.Identifiers.parse(column) do
{:ok, casted_column} -> {:cont, [casted_column | acc]}
{:error, reason} -> {:halt, {:error, reason}}
end
end)
|> then(fn result ->
Expand All @@ -58,20 +47,4 @@ defmodule Electric.Plug.Utils do
end
end)
end

defp contains_unescaped_quote?(string) do
Regex.match?(~r/(?<!")"(?!")/, string)
end

defp maybe_cast_quoted_identifier(string) do
if Regex.match?(~r/^"(.*)"$/, string),
do: String.replace(string, ~r/^"(.*)"$/, "\\1"),
# if identifier is not quoted, downcase it like Postgres would
else: Electric.Postgres.Identifiers.downcase(string)
end

defp unescape_quotes(string) do
string
|> String.replace(~r/""/, "\"")
end
end
66 changes: 66 additions & 0 deletions packages/sync-service/lib/electric/postgres/identifiers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,72 @@ defmodule Electric.Postgres.Identifiers do
@namedatalen 63
@ascii_downcase ?a - ?A

@doc """
Parse a PostgreSQL identifier, removing quotes if present and escaping internal ones
and downcasing the identifier otherwise.
## Examples
iex> Electric.Postgres.Identifiers.parse("FooBar")
{:ok, "foobar"}
iex> Electric.Postgres.Identifiers.parse(~S|"FooBar"|)
{:ok, "FooBar"}
iex> Electric.Postgres.Identifiers.parse(~S|Foo"Bar"|)
{:error, ~S|Invalid unquoted identifier contains special characters: Foo"Bar"|}
iex> Electric.Postgres.Identifiers.parse(~S| |)
{:error, ~S|Invalid unquoted identifier contains special characters: |}
iex> Electric.Postgres.Identifiers.parse("foob@r")
{:error, ~S|Invalid unquoted identifier contains special characters: foob@r|}
iex> Electric.Postgres.Identifiers.parse(~S|"Foo"Bar"|)
{:error, ~S|Invalid identifier with unescaped quote: Foo"Bar|}
iex> Electric.Postgres.Identifiers.parse(~S|""|)
{:error, "Invalid zero-length delimited identifier"}
iex> Electric.Postgres.Identifiers.parse("")
{:error, "Invalid zero-length delimited identifier"}
iex> Electric.Postgres.Identifiers.parse(~S|" "|)
{:ok, " "}
iex> Electric.Postgres.Identifiers.parse(~S|"Foo""Bar"|)
{:ok, ~S|Foo"Bar|}
"""
@spec parse(binary(), boolean(), boolean()) :: {:ok, binary()} | {:error, term()}
def parse(ident, truncate \\ false, single_byte_encoding \\ false) when is_binary(ident) do
if String.starts_with?(ident, ~S|"|) and String.ends_with?(ident, ~S|"|) do
ident_unquoted = String.slice(ident, 1..-2//1)
parse_quoted_identifier(ident_unquoted)
else
parse_unquoted_identifier(ident, truncate, single_byte_encoding)
end
end

defp parse_quoted_identifier(""), do: {:error, "Invalid zero-length delimited identifier"}

defp parse_quoted_identifier(ident) do
if contains_unescaped_quote?(ident),
do: {:error, "Invalid identifier with unescaped quote: #{ident}"},
else: {:ok, unescape_quotes(ident)}
end

defp parse_unquoted_identifier("", _, _), do: parse_quoted_identifier("")

defp parse_unquoted_identifier(ident, truncate, single_byte_encoding) do
unless valid_unquoted_identifier?(ident),
do: {:error, "Invalid unquoted identifier contains special characters: #{ident}"},
else: {:ok, downcase(ident, truncate, single_byte_encoding)}
end

defp contains_unescaped_quote?(string) do
Regex.match?(~r/(?<!")"(?!")/, string)
end

defp unescape_quotes(string) do
string
|> String.replace(~r/""/, "\"")
end

defp valid_unquoted_identifier?(identifier) do
Regex.match?(~r/^[a-zA-Z_][a-zA-Z0-9_]*$/, identifier)
end

@doc """
Downcase the identifier and truncate if necessary, using
PostgreSQL's algorithm for downcasing.
Expand Down

0 comments on commit 3fd74f0

Please sign in to comment.