solved 2 errors in linked in and youtube, made start of making releas…

…e more smooth
d3i-infra · Jan 16, 2024 · 5920df9 · 5920df9
1 parent fe6e9d3
commit 5920df9
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 11 deletions.
diff --git a/package.json b/package.json
@@ -38,6 +38,7 @@
     "start": "concurrently 'npm run start:py' 'npm run start:app'",
     "build": "npm run build:py && npm run build:app && npm run build:css",
     "archive": "cd build && zip -r ../release.zip .",
+    "archive_output": "cd build && zip -r ../release-$PLATFORM.zip .",
     "release": "npm run build && npm run archive",
     "test": "react-scripts test",
     "lint": "npm run fix:ts",

diff --git a/public/port-0.0.0-py3-none-any.whl b/public/port-0.0.0-py3-none-any.whl
diff --git a/release-script.sh b/release-script.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# release all individual platforms
+script_location = ./src/framework/processing/py/port/script.py
+
+all_platforms='platforms = \[ ("LinkedIn", extract_linkedin, linkedin.validate), ("Insta'
+all_platforms_commented_out='#platforms = \[ ("LinkedIn", extract_linkedin, linkedin.validate), ("Insta'
+
+# comment out all platforms in sequence
+sed -i "s/$all_platforms/$all_platforms_commented_out/g" $script_location
diff --git a/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl b/src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
diff --git a/src/framework/processing/py/port/linkedin.py b/src/framework/processing/py/port/linkedin.py
@@ -97,6 +97,7 @@ def company_follows_to_df(linkedin_zip: str) -> pd.DataFrame:
     'Company Follows.csv'
     """
     filename = "Company Follows.csv"
+
     b = unzipddp.extract_file_from_zip(linkedin_zip, filename)
     df = unzipddp.read_csv_from_bytes_to_df(b)
 
@@ -109,11 +110,13 @@ def member_follows_to_df(linkedin_zip: str) -> pd.DataFrame:
     """
     filename = "Member_Follows.csv"
     b = unzipddp.extract_file_from_zip(linkedin_zip, filename)
-
-    # remove zero or more any chars (including linebreaks) non greedy up to and including 2 consequetive line breaks
-    b = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', b.read()))
-
-    df = unzipddp.read_csv_from_bytes_to_df(b)
+    df = pd.DataFrame()
+    try:
+        # remove zero or more any chars (including linebreaks) non greedy up to and including 2 consequetive line breaks
+        b = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', b.read()))
+        df = unzipddp.read_csv_from_bytes_to_df(b)
+    except:
+        pass
 
     return df
 

diff --git a/src/framework/processing/py/port/script.py b/src/framework/processing/py/port/script.py
@@ -45,6 +45,14 @@ def process(session_id):
         ("Twitter", extract_twitter, twitter.validate),
     ]
 
+    #platforms = [ ("LinkedIn", extract_linkedin, linkedin.validate), ]
+    #platforms = [ ("Instagram", extract_instagram, instagram.validate), ]
+    #platforms = [ ("Chrome", extract_chrome, chrome.validate), ]
+    #platforms = [ ("Facebook", extract_facebook, facebook.validate), ]
+    #platforms = [ ("Youtube", extract_youtube, youtube.validate), ]
+    #platforms = [ ("TikTok", extract_tiktok, tiktok.validate), ]
+    #platforms = [ ("Twitter", extract_twitter, twitter.validate), ]
+
     # progress in %
     subflows = len(platforms)
     steps = 2

diff --git a/src/framework/processing/py/port/youtube.py b/src/framework/processing/py/port/youtube.py
@@ -192,7 +192,6 @@ def my_comments_to_df(youtube_zip: str, validation: ValidateInput) -> pd.DataFra
     return df
 
 
-
 # Extract Watch later.csv
 def watch_later_to_df(youtube_zip: str) -> pd.DataFrame:
     """
@@ -203,13 +202,14 @@ def watch_later_to_df(youtube_zip: str) -> pd.DataFrame:
     """
 
     ratings_bytes = unzipddp.extract_file_from_zip(youtube_zip, "Watch later.csv")
+    df = pd.DataFrame()
 
-    # remove the first 3 lines from the .csv
-    #ratings_bytes = io.BytesIO(re.sub(b'^(.*)\n(.*)\n\n', b'', ratings_bytes.read()))
-    ratings_bytes = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', ratings_bytes.read()))
-
-    df = unzipddp.read_csv_from_bytes_to_df(ratings_bytes)
     try:
+        # remove the first 3 lines from the .csv
+        #ratings_bytes = io.BytesIO(re.sub(b'^(.*)\n(.*)\n\n', b'', ratings_bytes.read()))
+        ratings_bytes = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', ratings_bytes.read()))
+
+        df = unzipddp.read_csv_from_bytes_to_df(ratings_bytes)
         df['Video-ID'] = 'https://www.youtube.com/watch?v=' + df['Video-ID']
     except Exception as e:
         logger.debug("Exception was caught:  %s", e)