Skip to content

Commit

Permalink
Fix sensitive data columns
Browse files Browse the repository at this point in the history
  • Loading branch information
murilo committed Sep 7, 2020
1 parent 8ad2a94 commit 7183a79
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 31,
"metadata": {},
"outputs": [
{
Expand All @@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 32,
"metadata": {},
"outputs": [
{
Expand All @@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 33,
"metadata": {},
"outputs": [
{
Expand All @@ -61,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 34,
"metadata": {},
"outputs": [
{
Expand All @@ -70,7 +70,7 @@
"pyspark.sql.session.SparkSession"
]
},
"execution_count": 26,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
2 changes: 1 addition & 1 deletion dev/docker-volume/src/DataProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def create_trusted_order(spark:SparkSession):
output_path = TRUSTED_DATA_PATH / 'order'

# anonymize sensitive data by dropping columns
sensitive_data_columns = ['cpf', 'customer_name', 'consumer_customer_name', 'consumer_customer_phone_number']
sensitive_data_columns = ['order_cpf', 'order_customer_name', 'consumer_customer_name', 'consumer_customer_phone_number']
tmp = tmp.drop(*sensitive_data_columns)

tmp.write.parquet(str(output_path))
Expand Down
Binary file modified dev/docker-volume/src/__pycache__/DataProcessor.cpython-38.pyc
Binary file not shown.

0 comments on commit 7183a79

Please sign in to comment.