Skip to content

Commit

Permalink
chore: Validate table names locally first (#1873)
Browse files Browse the repository at this point in the history
Fixes #1814

We still go to PG to validate the table as we need its OID, so this is
not strictly necessary, but since we have validation logic we might as
well validate things locally to avoid going to PG for clearly invalid
table identifiers?

This also fixes the accepted characters by PG for identifiers [based on
the
docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS)
as it was excluding some unicode chars and dollar signs before.
  • Loading branch information
msfstef authored Oct 22, 2024
1 parent 4d9b35a commit 696eb9d
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 8 deletions.
5 changes: 5 additions & 0 deletions .changeset/slow-panthers-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@core/sync-service": patch
---

Validate table names locally first before going to PG to save resources
87 changes: 86 additions & 1 deletion packages/sync-service/lib/electric/postgres/identifiers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,36 @@ defmodule Electric.Postgres.Identifiers do
@namedatalen 63
@ascii_downcase ?a - ?A

defmodule StringSplitter do
@moduledoc """
Utility module for splitting strings on a schema delimiter
"""

@doc """
Split a string on a schema delimiter, only if the delimiter is not
inside quotes, returning a list of strings
"""
@spec split_outside_quotes(binary()) :: [binary(), ...]
def split_outside_quotes(string) do
split_outside_quotes(string, "", [], false)
end

# Return the accumulated parts
defp split_outside_quotes("", current, acc, _in_quotes), do: acc ++ [current]

# If we hit a period and we're not in quotes, split here
defp split_outside_quotes(<<".", rest::binary>>, current, acc, false),
do: split_outside_quotes(rest, "", acc ++ [current], false)

# Toggle the in_quotes flag when encountering a quote
defp split_outside_quotes(<<"\"", rest::binary>>, current, acc, in_quotes),
do: split_outside_quotes(rest, current <> "\"", acc, !in_quotes)

# Continue processing, accumulating characters
defp split_outside_quotes(<<char, rest::binary>>, current, acc, in_quotes),
do: split_outside_quotes(rest, current <> <<char>>, acc, in_quotes)
end

@doc """
Parse a PostgreSQL identifier, removing quotes if present and escaping internal ones
and downcasing the identifier otherwise.
Expand All @@ -10,22 +40,31 @@ defmodule Electric.Postgres.Identifiers do
iex> Electric.Postgres.Identifiers.parse("FooBar")
{:ok, "foobar"}
iex> Electric.Postgres.Identifiers.parse(~S|"FooBar"|)
{:ok, "FooBar"}
iex> Electric.Postgres.Identifiers.parse(~S|Foo"Bar"|)
{:error, ~S|Invalid unquoted identifier contains special characters: Foo"Bar"|}
iex> Electric.Postgres.Identifiers.parse(~S| |)
{:error, ~S|Invalid unquoted identifier contains special characters: |}
iex> Electric.Postgres.Identifiers.parse("foob@r")
{:error, ~S|Invalid unquoted identifier contains special characters: foob@r|}
iex> Electric.Postgres.Identifiers.parse(~S|"Foo"Bar"|)
{:error, ~S|Invalid identifier with unescaped quote: Foo"Bar|}
iex> Electric.Postgres.Identifiers.parse(~S|""|)
{:error, "Invalid zero-length delimited identifier"}
iex> Electric.Postgres.Identifiers.parse("")
{:error, "Invalid zero-length delimited identifier"}
iex> Electric.Postgres.Identifiers.parse(~S|" "|)
{:ok, " "}
iex> Electric.Postgres.Identifiers.parse(~S|"Foo""Bar"|)
{:ok, ~S|Foo"Bar|}
"""
Expand Down Expand Up @@ -65,7 +104,53 @@ defmodule Electric.Postgres.Identifiers do
end

defp valid_unquoted_identifier?(identifier) do
Regex.match?(~r/^[a-zA-Z_][a-zA-Z0-9_]*$/, identifier)
Regex.match?(~r/^[\pL_][\pL\pM_0-9$]*$]*$/u, identifier)
end

@doc """
Parse a PostgreSQL relation identifier
## Examples
iex> Electric.Postgres.Identifiers.parse_relation("foo")
{:ok, {"public", "foo"}}
iex> Electric.Postgres.Identifiers.parse_relation("foo.bar")
{:ok, {"foo", "bar"}}
iex> Electric.Postgres.Identifiers.parse_relation(~S|"foo"."bar"|)
{:ok, {"foo", "bar"}}
iex> Electric.Postgres.Identifiers.parse_relation(~S|"foo.woah"."bar"|)
{:ok, {"foo.woah", "bar"}}
iex> Electric.Postgres.Identifiers.parse_relation(~S|"foo".bar|)
{:ok, {"foo", "bar"}}
iex> Electric.Postgres.Identifiers.parse_relation(~S|"foo"."bar|)
{:error, ~S|Invalid unquoted identifier contains special characters: "bar|}
iex> Electric.Postgres.Identifiers.parse_relation("foo.bar.baz")
{:error, "Invalid relation identifier, too many delimiters: foo.bar.baz"}
"""
@spec parse_relation(binary()) :: {:ok, Electric.relation()} | {:error, term()}
def parse_relation(ident) do
case StringSplitter.split_outside_quotes(ident) do
[table] ->
case parse(table) do
{:ok, parsed} -> {:ok, {"public", parsed}}
{:error, reason} -> {:error, reason}
end

[schema, table] ->
with {:ok, schema} <- parse(schema),
{:ok, table} <- parse(table) do
{:ok, {schema, table}}
end

_ ->
{:error, "Invalid relation identifier, too many delimiters: #{ident}"}
end
end

@doc """
Expand Down
9 changes: 5 additions & 4 deletions packages/sync-service/lib/electric/shapes/shape.ex
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,11 @@ defmodule Electric.Shapes.Shape do
end

defp validate_table(table, inspector) when is_binary(table) do
case Inspector.load_relation(table, inspector) do
# Parse identifier locally first to avoid hitting PG for invalid tables
with {:ok, _} <- Electric.Postgres.Identifiers.parse_relation(table),
{:ok, rel} <- Inspector.load_relation(table, inspector) do
{:ok, rel}
else
{:error, err} ->
case Regex.run(~r/.+ relation "(?<name>.+)" does not exist/, err, capture: :all_names) do
[table_name] ->
Expand All @@ -158,9 +162,6 @@ defmodule Electric.Shapes.Shape do
_ ->
{:error, {:root_table, [err]}}
end

{:ok, rel} ->
{:ok, rel}
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ defmodule Electric.Plug.DeleteShapePlugTest do

assert Jason.decode!(conn.resp_body) == %{
"root_table" => [
"invalid name syntax"
"Invalid zero-length delimited identifier"
]
}
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ defmodule Electric.Plug.ServeShapePlugTest do
assert Jason.decode!(conn.resp_body) == %{
"offset" => ["has invalid format"],
"root_table" => [
"invalid name syntax"
"Invalid zero-length delimited identifier"
]
}
end
Expand Down
2 changes: 1 addition & 1 deletion packages/sync-service/test/electric/shapes/shape_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ defmodule Electric.Shapes.ShapeTest do
end

test "errors on empty table name", %{inspector: inspector} do
{:error, {:root_table, ["ERROR 42602 (invalid_name) invalid name syntax"]}} =
{:error, {:root_table, ["Invalid zero-length delimited identifier"]}} =
Shape.new("", inspector: inspector)
end

Expand Down

0 comments on commit 696eb9d

Please sign in to comment.