-
Notifications
You must be signed in to change notification settings - Fork 3
/
wordpress-cloning-img-in-html-script.py
128 lines (109 loc) · 4.85 KB
/
wordpress-cloning-img-in-html-script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import sys
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import requests
import shutil
from requests.auth import HTTPBasicAuth
from github_writer import GitHubWriter
from wordpress_image_downloader import download_image_if_not_exists
def extract_urls_from_style(style):
urls = []
start = style.find('url(')
while start != -1:
start += 4 # Skip 'url('
end = style.find(')', start)
url = style[start:end].strip('\'"')
urls.append(url)
start = style.find('url(', end)
return urls
def download_and_update_html(environment, wordpress_staging_username, wordpress_staging_password):
writer = GitHubWriter()
headers = {
'Accept': 'application/json',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
}
folder_path = './dist/assets'
base_path = './assets'
# Setup authentication if not in production environment
auth = None
if environment != "PROD":
auth = HTTPBasicAuth(wordpress_staging_username, wordpress_staging_password)
# Create directory for saving images if it does not exist
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# List of HTML files to be updated
html_files = [
'dist/index.html',
'dist/index-ar.html',
'dist/index-cs.html',
'dist/index-de.html',
'dist/index-es.html',
'dist/index-fr.html',
'dist/index-it.html',
'dist/index-ja.html',
'dist/index-ko.html',
'dist/index-pl.html',
'dist/index-pt.html',
'dist/index-ru.html',
'dist/index-tr.html',
'dist/index-zh-CN.html',
'dist/index-zh-TW.html'
]
for html_file in html_files:
if not os.path.exists(html_file):
writer.write_summary(f"- {html_file} does not exist, skipping...\n")
continue
with open(html_file, "r") as file:
soup = BeautifulSoup(file, 'html.parser')
images = soup.find_all('img')
elements_with_style = soup.find_all(style=True)
header_styles = soup.find_all('style')
for img in images:
img_url = img.get('src')
if img_url and (not img_url.startswith('data:image')):
sanitized_filepath = download_image_if_not_exists(img_url, headers, auth, environment, writer)
img['src'] = os.path.join(base_path, os.path.basename(sanitized_filepath))
img_srcset = img.get('srcset')
if img_srcset and (not img_srcset.startswith('data:image')):
new_srcset = []
srcset_items = img_srcset.split(',')
for item in srcset_items:
url, size = item.strip().split(' ')
sanitized_filepath = download_image_if_not_exists(url, headers, auth, environment, writer)
new_srcset.append(f"{os.path.join(base_path, os.path.basename(sanitized_filepath))} {size}")
img['srcset'] = ', '.join(new_srcset)
for element in elements_with_style:
style = element.get('style')
if 'url(' in style:
urls = extract_urls_from_style(style)
for url in urls:
sanitized_filepath = download_image_if_not_exists(url, headers, auth, environment, writer)
new_url = os.path.join(base_path, os.path.basename(sanitized_filepath))
style = style.replace(url, new_url)
element['style'] = style
for style_tag in header_styles:
style_content = style_tag.string
if style_content and 'url(' in style_content:
urls = extract_urls_from_style(style_content)
for url in urls:
sanitized_filepath = download_image_if_not_exists(url, headers, auth, environment, writer)
new_url = os.path.join(base_path, os.path.basename(sanitized_filepath))
style_content = style_content.replace(url, new_url)
style_tag.string.replace_with(style_content)
with open(html_file, 'w') as file:
file.write(str(soup))
if __name__ == "__main__":
writer = GitHubWriter()
try:
if len(sys.argv) != 4:
raise ValueError("Usage: python wordpress-cloning-script.py <environment> <wordpress_staging_username> <wordpress_staging_password>")
env = sys.argv[1]
wordpress_staging_username = sys.argv[2]
wordpress_staging_password = sys.argv[3]
download_and_update_html(env, wordpress_staging_username, wordpress_staging_password)
writer.write_output("script-success", "true")
except Exception as e:
writer.write_summary(f"Error: {str(e)}\n")
writer.write_output("script-success", "false")
sys.exit(1)