From 7183a7947e2e9a64c6f9302410885eeedb60f90e Mon Sep 17 00:00:00 2001 From: murilo Date: Sun, 6 Sep 2020 21:36:07 -0300 Subject: [PATCH] Fix sensitive data columns --- .../04-create-trusted-data-order-Copy1.ipynb | 10 +++++----- dev/docker-volume/src/DataProcessor.py | 2 +- .../__pycache__/DataProcessor.cpython-38.pyc | Bin 4302 -> 4281 bytes 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/docker-volume/notebooks/04-create-trusted-data-order-Copy1.ipynb b/dev/docker-volume/notebooks/04-create-trusted-data-order-Copy1.ipynb index 0f4b24d..1bf1515 100644 --- a/dev/docker-volume/notebooks/04-create-trusted-data-order-Copy1.ipynb +++ b/dev/docker-volume/notebooks/04-create-trusted-data-order-Copy1.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 27, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -70,7 +70,7 @@ "pyspark.sql.session.SparkSession" ] }, - "execution_count": 26, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } diff --git a/dev/docker-volume/src/DataProcessor.py b/dev/docker-volume/src/DataProcessor.py index 48b9f38..dc48e14 100644 --- a/dev/docker-volume/src/DataProcessor.py +++ b/dev/docker-volume/src/DataProcessor.py @@ -123,7 +123,7 @@ def create_trusted_order(spark:SparkSession): output_path = TRUSTED_DATA_PATH / 'order' # anonymize sensitive data by dropping columns - sensitive_data_columns = ['cpf', 'customer_name', 'consumer_customer_name', 'consumer_customer_phone_number'] + sensitive_data_columns = ['order_cpf', 'order_customer_name', 'consumer_customer_name', 'consumer_customer_phone_number'] tmp = tmp.drop(*sensitive_data_columns) tmp.write.parquet(str(output_path)) diff --git a/dev/docker-volume/src/__pycache__/DataProcessor.cpython-38.pyc b/dev/docker-volume/src/__pycache__/DataProcessor.cpython-38.pyc index fbcb3a0556ca9cc3f073cfba2f6ab1364f082758..2847d51ce954e314c5fbfc947975ab873c607b93 100644 GIT binary patch delta 100 zcmX@7xKoigl$V!_0SJN`LgQUG^7imBPMbW7r**PEZ<25nXMRyiYEgW0L0XhBggJQz yZvZ3P~dFn$e2#=^<<0{#He6CGFp delta 120 zcmdm~cutWwl$V!_0SHdihsK+4Px#