diff --git a/tubeup/TubeUp.py b/tubeup/TubeUp.py index 7dd6801..98bdae7 100644 --- a/tubeup/TubeUp.py +++ b/tubeup/TubeUp.py @@ -10,7 +10,7 @@ from internetarchive.config import parse_config_file from datetime import datetime from yt_dlp import YoutubeDL -from .utils import (get_itemname, check_is_file_empty, +from .utils import (get_itemname, check_is_file_empty, strip_ip_from_meta, EMPTY_ANNOTATION_FILE) from logging import getLogger from urllib.parse import urlparse @@ -324,6 +324,11 @@ def upload_ia(self, videobasename, custom_meta=None): with open(json_metadata_filepath, 'r', encoding='utf-8') as f: vid_meta = json.load(f) + mod, new_meta = strip_ip_from_meta(vid_meta) + if mod: + with open(json_metadata_filepath, 'w') as f: + json.dump(new_meta, f) + # Exit if video download did not complete, don't upload .part files to IA for ext in ['*.part', '*.f303.*', '*.f302.*', '*.ytdl', '*.f251.*', '*.248.*', '*.f247.*', '*.temp']: if glob.glob(videobasename + ext): diff --git a/tubeup/utils.py b/tubeup/utils.py index bc12845..846c9fb 100644 --- a/tubeup/utils.py +++ b/tubeup/utils.py @@ -1,5 +1,6 @@ import os import re +from urllib.parse import urlparse, parse_qs, urlencode EMPTY_ANNOTATION_FILE = ('' @@ -29,3 +30,39 @@ def check_is_file_empty(filepath): return os.stat(filepath).st_size == 0 else: raise FileNotFoundError("Path '%s' doesn't exist" % filepath) + + +def strip_ip_from_url(url): + """ + Strip occurence of IP address as found in path segments like in /ip/1.2.3.4/ + or in an "ip" query-parameter, like in ?ip=1.2.3.4 + """ + u = urlparse(url) + u = u._replace(path=re.sub(r'/ip/[^/]+', r'/ip/REDACTED', u.path)) + if u.query != '': + qs = parse_qs(u.query) + try: + del(qs['ip']) + u = u._replace(query=urlencode(qs, True)) + except: + pass + return u.geturl() + + +def strip_ip_from_meta(meta): + modified = False + if 'url' in meta: + redacted_url = strip_ip_from_url(meta['url']) + if redacted_url != meta['url']: + meta['url'] = redacted_url + modified = True + + for _format in meta['formats']: + for field in ['manifest_url', 'fragment_base_url', 'url']: + if field in _format: + redacted_url = strip_ip_from_url(_format[field]) + if redacted_url != _format[field]: + _format[field] = redacted_url + modified = True + + return modified, meta