-
Notifications
You must be signed in to change notification settings - Fork 237
/
2mentest.py
35 lines (28 loc) · 1.2 KB
/
2mentest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import xml.etree.ElementTree as ET
import random
def extract_sitemap_data(xml_file_path, num_urls=200):
tree = ET.parse(xml_file_path)
root = tree.getroot()
namespaces = {
'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
'image': 'http://www.google.com/schemas/sitemap-image/1.1'
}
all_products = []
for url in root.findall('ns:url', namespaces):
loc = url.find('ns:loc', namespaces).text
image = url.find('image:image', namespaces)
if image is not None:
image_loc = image.find('image:loc', namespaces).text
image_title = image.find('image:title', namespaces).text
all_products.append((loc, image_loc, image_title))
# Selecting 200 random products from the list
if len(all_products) > num_urls:
return random.sample(all_products, num_urls)
return all_products
def main():
xml_file_path = 'sitemap_products_20.xml' # Replace with your XML file path
random_entries = extract_sitemap_data(xml_file_path)
for entry in random_entries:
print(f"URL: {entry[0]}\nImage URL: {entry[1]}\nTitle: {entry[2]}\n")
if __name__ == "__main__":
main()