-
Notifications
You must be signed in to change notification settings - Fork 3
/
wordpress-cloning-img-script.py
90 lines (79 loc) · 3.74 KB
/
wordpress-cloning-img-script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import sys
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import requests
import shutil
from requests.auth import HTTPBasicAuth
def download_and_update_html(environment, wordpress_staging_username, wordpress_staging_password, folder_path='./dist/'):
headers = {
'Accept': 'application/json',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
}
# Setup authentication if not in production environment
auth = None
base_path = None
if environment != "PROD":
auth = HTTPBasicAuth(wordpress_staging_username, wordpress_staging_password)
base_path = 'https://d3055hwma3riwo.cloudfront.net/'
else:
base_path = 'https://update-me.cloudfront.net/'
# Create directory for saving images if it does not exist
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# Open and read the local HTML file
with open(f"dist/index.html", "r") as file:
soup = BeautifulSoup(file, 'html.parser')
# Find all image tags
images = soup.find_all('img')
# Loop through all images, download them, and update their src and srcset attributes
for img in images:
# Get the source attribute of the image
img_url = img.get('src')
if img_url and (not img_url.startswith('data:image')):
# Complete the URL if it's relative (not absolute link)
full_img_url = img_url
# Download the image
img_data = requests.get(full_img_url, stream=True, headers=headers, auth=auth)
# Define the new local filename
local_filename = os.path.basename(img_url)
# Save the image locally
local_filepath = os.path.join(folder_path, local_filename)
with open(local_filepath, 'wb') as file:
img_data.raw.decode_content = True
shutil.copyfileobj(img_data.raw, file)
# Update the src attribute to the new local path
img['src'] = os.path.join(base_path, local_filename)
# Get the srcset attribute of the image
img_srcset = img.get('srcset')
if img_srcset and (not img_srcset.startswith('data:image')):
new_srcset = []
srcset_items = img_srcset.split(',')
for item in srcset_items:
url, size = item.strip().split(' ')
full_img_url = url
# Download the image
img_data = requests.get(full_img_url, stream=True, headers=headers, auth=auth)
# Define the new local filename
local_filename = os.path.basename(url)
# Save the image locally
local_filepath = os.path.join(folder_path, local_filename)
with open(local_filepath, 'wb') as file:
img_data.raw.decode_content = True
shutil.copyfileobj(img_data.raw, file)
# Add the new srcset item to the list
new_srcset.append(f"{os.path.join(base_path, local_filename)} {size}")
# Update the srcset attribute to the new local paths
img['srcset'] = ', '.join(new_srcset)
# Write the updated HTML to a new file
with open(f"dist/index.html", 'w') as file:
file.write(str(soup))
if __name__ == "__main__":
print(sys.argv)
if len(sys.argv) != 4:
print("Usage: python wordpress-cloning-script.py <environment> <wordpress_staging_username> <wordpress_staging_password>")
sys.exit(1)
env = sys.argv[1]
wordpress_staging_username = sys.argv[2]
wordpress_staging_password = sys.argv[3]
download_and_update_html(env, wordpress_staging_username, wordpress_staging_password)