-
Notifications
You must be signed in to change notification settings - Fork 3
/
wordpress_image_downloader.py
53 lines (43 loc) · 2.13 KB
/
wordpress_image_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import re
import requests
import shutil
from urllib.parse import urlparse, urlunparse, parse_qs
def sanitize_filename(filename):
# Replace non-English letters and spaces with underscores
return re.sub(r'[^a-zA-Z0-9_.]', '_', filename)
def strip_query_params(url):
parsed_url = urlparse(url)
return urlunparse(parsed_url._replace(query=''))
def download_image_if_not_exists(full_img_url, headers, auth, env, writer):
# Ensure the URL has a protocol
if full_img_url.startswith("//"):
full_img_url = "https:" + full_img_url
elif not full_img_url.startswith(("http://", "https://")):
full_img_url = "https://" + full_img_url
# Strip query parameters for filename sanitization only
url_without_query = strip_query_params(full_img_url)
# Determine the local file path
local_dir = './dist/assets'
if not os.path.exists(local_dir):
os.makedirs(local_dir)
sanitized_filename = sanitize_filename(os.path.basename(url_without_query))
sanitized_filepath = os.path.join(local_dir, sanitized_filename)
# Download and save the image if it doesn't exist locally
if not os.path.exists(sanitized_filepath):
try:
img_data = requests.get(full_img_url, stream=True, headers=headers, auth=auth)
if img_data.status_code != 200:
writer.write_summary_and_fail_on_prod(f"- 🚨 Failed to download image: {full_img_url}, status code: {img_data.status_code}\n", env)
return sanitized_filepath
with open(sanitized_filepath, 'wb') as file:
img_data.raw.decode_content = True
shutil.copyfileobj(img_data.raw, file)
writer.write_summary(f"- Successfully downloaded image: {full_img_url} \n")
except requests.exceptions.MissingSchema:
writer.write_summary_and_fail_on_prod(f"- Please use a full URL for {full_img_url}. \n", env)
except Exception as e:
writer.write_summary_and_fail_on_prod(f"Error: {str(e)}\n", env)
else:
writer.write_summary(f"- Image already exists: {sanitized_filepath}\n")
return sanitized_filepath