Skip to content

Commit

Permalink
solved 2 errors in linked in and youtube, made start of making releas…
Browse files Browse the repository at this point in the history
…e more smooth
  • Loading branch information
trbKnl committed Jan 16, 2024
1 parent fe6e9d3 commit 5920df9
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 11 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"start": "concurrently 'npm run start:py' 'npm run start:app'",
"build": "npm run build:py && npm run build:app && npm run build:css",
"archive": "cd build && zip -r ../release.zip .",
"archive_output": "cd build && zip -r ../release-$PLATFORM.zip .",
"release": "npm run build && npm run archive",
"test": "react-scripts test",
"lint": "npm run fix:ts",
Expand Down
Binary file modified public/port-0.0.0-py3-none-any.whl
Binary file not shown.
10 changes: 10 additions & 0 deletions release-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# release all individual platforms
script_location = ./src/framework/processing/py/port/script.py

all_platforms='platforms = \[ ("LinkedIn", extract_linkedin, linkedin.validate), ("Insta'
all_platforms_commented_out='#platforms = \[ ("LinkedIn", extract_linkedin, linkedin.validate), ("Insta'

# comment out all platforms in sequence
sed -i "s/$all_platforms/$all_platforms_commented_out/g" $script_location
Binary file modified src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
Binary file not shown.
13 changes: 8 additions & 5 deletions src/framework/processing/py/port/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def company_follows_to_df(linkedin_zip: str) -> pd.DataFrame:
'Company Follows.csv'
"""
filename = "Company Follows.csv"

b = unzipddp.extract_file_from_zip(linkedin_zip, filename)
df = unzipddp.read_csv_from_bytes_to_df(b)

Expand All @@ -109,11 +110,13 @@ def member_follows_to_df(linkedin_zip: str) -> pd.DataFrame:
"""
filename = "Member_Follows.csv"
b = unzipddp.extract_file_from_zip(linkedin_zip, filename)

# remove zero or more any chars (including linebreaks) non greedy up to and including 2 consequetive line breaks
b = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', b.read()))

df = unzipddp.read_csv_from_bytes_to_df(b)
df = pd.DataFrame()
try:
# remove zero or more any chars (including linebreaks) non greedy up to and including 2 consequetive line breaks
b = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', b.read()))
df = unzipddp.read_csv_from_bytes_to_df(b)
except:
pass

return df

Expand Down
8 changes: 8 additions & 0 deletions src/framework/processing/py/port/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ def process(session_id):
("Twitter", extract_twitter, twitter.validate),
]

#platforms = [ ("LinkedIn", extract_linkedin, linkedin.validate), ]
#platforms = [ ("Instagram", extract_instagram, instagram.validate), ]
#platforms = [ ("Chrome", extract_chrome, chrome.validate), ]
#platforms = [ ("Facebook", extract_facebook, facebook.validate), ]
#platforms = [ ("Youtube", extract_youtube, youtube.validate), ]
#platforms = [ ("TikTok", extract_tiktok, tiktok.validate), ]
#platforms = [ ("Twitter", extract_twitter, twitter.validate), ]

# progress in %
subflows = len(platforms)
steps = 2
Expand Down
12 changes: 6 additions & 6 deletions src/framework/processing/py/port/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ def my_comments_to_df(youtube_zip: str, validation: ValidateInput) -> pd.DataFra
return df



# Extract Watch later.csv
def watch_later_to_df(youtube_zip: str) -> pd.DataFrame:
"""
Expand All @@ -203,13 +202,14 @@ def watch_later_to_df(youtube_zip: str) -> pd.DataFrame:
"""

ratings_bytes = unzipddp.extract_file_from_zip(youtube_zip, "Watch later.csv")
df = pd.DataFrame()

# remove the first 3 lines from the .csv
#ratings_bytes = io.BytesIO(re.sub(b'^(.*)\n(.*)\n\n', b'', ratings_bytes.read()))
ratings_bytes = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', ratings_bytes.read()))

df = unzipddp.read_csv_from_bytes_to_df(ratings_bytes)
try:
# remove the first 3 lines from the .csv
#ratings_bytes = io.BytesIO(re.sub(b'^(.*)\n(.*)\n\n', b'', ratings_bytes.read()))
ratings_bytes = io.BytesIO(re.sub(b'^((?s).)*?\n\n', b'', ratings_bytes.read()))

df = unzipddp.read_csv_from_bytes_to_df(ratings_bytes)
df['Video-ID'] = 'https://www.youtube.com/watch?v=' + df['Video-ID']
except Exception as e:
logger.debug("Exception was caught: %s", e)
Expand Down

0 comments on commit 5920df9

Please sign in to comment.