diff --git a/docs/spark-queries.md b/docs/spark-queries.md index 188b05249bc4..6e6637e229d4 100644 --- a/docs/spark-queries.md +++ b/docs/spark-queries.md @@ -268,6 +268,18 @@ order by made_current_at; | 2019-02-09 16:32:47.336 | append | 57897183625154 | true | application_1520379288616_155055 | | 2019-02-08 03:47:55.948 | overwrite | 51792995261850 | true | application_1520379288616_152431 | +### Entries + +To show all the table's current manifest entries for both data and delete files. + +```sql +SELECT * FROM prod.db.table.entries; +``` + +| status | snapshot_id | sequence_number | file_sequence_number | data_file | readable_metrics | +| -- | -- | -- | -- | -- | -- | +| 2 | 57897183625154 | 0 | 0 | {"content":0,"file_path":"s3:/.../table/data/00047-25-833044d0-127b-415c-b874-038a4f978c29-00612.parquet","file_format":"PARQUET","spec_id":0,"record_count":15,"file_size_in_bytes":473,"column_sizes":{1:103},"value_counts":{1:15},"null_value_counts":{1:0},"nan_value_counts":{},"lower_bounds":{1:},"upper_bounds":{1:},"key_metadata":null,"split_offsets":[4],"equality_ids":null,"sort_order_id":0} | {"c1":{"column_size":103,"value_count":15,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":3}} | + ### Files To show a table's current files: @@ -335,6 +347,18 @@ Note: 2. The partitions metadata table shows partitions with data files or delete files in the current snapshot. However, delete files are not applied, and so in some cases partitions may be shown even though all their data rows are marked deleted by delete files. +### Positional Delete Files + +To show all positional delete files from the current snapshot of table: + +```sql +SELECT * from prod.db.table.position_deletes; +``` + +| file_path | pos | row | spec_id | delete_file_path | +| -- | -- | -- | -- | -- | +| s3:/.../table/data/00042-3-a9aa8b24-20bc-4d56-93b0-6b7675782bb5-00001.parquet | 1 | 0 | 0 | s3:/.../table/data/00191-1933-25e9f2f3-d863-4a69-a5e1-f9aeeebe60bb-00001-deletes.parquet | + ### All Metadata Tables These tables are unions of the metadata tables specific to the current snapshot, and return metadata across all snapshots. @@ -357,6 +381,31 @@ SELECT * FROM prod.db.table.all_data_files; | 0|s3://.../dt=20210103/00000-0-26222098-032f-472b-8ea5-651a55b21210-00001.parquet| PARQUET|{20210103}| 14| 2444|{1 -> 94, 2 -> 17}|{1 -> 14, 2 -> 14}| {1 -> 0, 2 -> 0}| {}|{1 -> 1, 2 -> 20210103}|{1 -> 3, 2 -> 20210103}| null| [4]| null| 0| | 0|s3://.../dt=20210104/00000-0-a3bb1927-88eb-4f1c-bc6e-19076b0d952e-00001.parquet| PARQUET|{20210104}| 14| 2444|{1 -> 94, 2 -> 17}|{1 -> 14, 2 -> 14}| {1 -> 0, 2 -> 0}| {}|{1 -> 1, 2 -> 20210104}|{1 -> 3, 2 -> 20210104}| null| [4]| null| 0| +#### All Delete Files + +To show the table's delete files and each file's metadata from all the snapshots: + +```sql +SELECT * FROM prod.db.table.all_delete_files; +``` + +| content | file_path | file_format | spec_id | record_count | file_size_in_bytes | column_sizes | value_counts | null_value_counts | nan_value_counts | lower_bounds | upper_bounds | key_metadata | split_offsets | equality_ids | sort_order_id | readable_metrics | +| -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | +| 1 | s3:/.../table/data/00081-4-a9aa8b24-20bc-4d56-93b0-6b7675782bb5-00001-deletes.parquet | PARQUET | 0 | 1 | 1560 | {2147483545:46,2147483546:152} | {2147483545:1,2147483546:1} | {2147483545:0,2147483546:0} | {} | {2147483545:,2147483546:s3:/.../table/data/00000-0-f9709213-22ca-4196-8733-5cb15d2afeb9-00001.parquet} | {2147483545:,2147483546:s3:/.../table/data/00000-0-f9709213-22ca-4196-8733-5cb15d2afeb9-00001.parquet} | NULL | [4] | NULL | NULL | {"data":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"id":{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null}} | +| 2 | s3:/.../table/data/00047-25-833044d0-127b-415c-b874-038a4f978c29-00612.parquet | PARQUET | 0 | 126506 | 28613985 | {100:135377,101:11314} | {100:126506,101:126506} | {100:105434,101:11} | {} | {100:0,101:17} | {100:404455227527,101:23} | NULL | NULL | [1] | 0 | {"id":{"column_size":135377,"value_count":126506,"null_value_count":105434,"nan_value_count":null,"lower_bound":0,"upper_bound":404455227527},"data":{"column_size":11314,"value_count":126506,"null_value_count": 11,"nan_value_count":null,"lower_bound":17,"upper_bound":23}} | + +#### All Entries + +To show the table's manifest entries from all the snapshots for both data and delete files: + +```sql +SELECT * FROM prod.db.table.all_entries; +``` + +| status | snapshot_id | sequence_number | file_sequence_number | data_file | readable_metrics | +| -- | -- | -- | -- | -- | -- | +| 2 | 57897183625154 | 0 | 0 | {"content":0,"file_path":"s3:/.../table/data/00047-25-833044d0-127b-415c-b874-038a4f978c29-00612.parquet","file_format":"PARQUET","spec_id":0,"record_count":15,"file_size_in_bytes":473,"column_sizes":{1:103},"value_counts":{1:15},"null_value_counts":{1:0},"nan_value_counts":{},"lower_bounds":{1:},"upper_bounds":{1:},"key_metadata":null,"split_offsets":[4],"equality_ids":null,"sort_order_id":0} | {"c1":{"column_size":103,"value_count":15,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":3}} | + #### All Manifests To show all of the table's manifest files: