forked from KalleHallden/BlackFridayScrape
-
Notifications
You must be signed in to change notification settings - Fork 0
/
price_scraper.py
117 lines (100 loc) · 3.65 KB
/
price_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import requests
import json
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from product import Product
URL = "http://www.amazon.com/"
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
driver = webdriver.Chrome("/Users/kalle/Downloads/chromedriver", chrome_options=options)
search_term = str(raw_input("What are you looking for?\n:"))
driver.get(URL)
element = driver.find_element_by_xpath('//*[@id="twotabsearchtextbox"]')
element.send_keys(search_term)
element.send_keys(Keys.ENTER)
products = []
def convert_price_toNumber(price):
price = price.split("$")[1]
try:
price = price.split("\n")[0] + "." + price.split("\n")[1]
except:
Exception()
try:
price = price.split(",")[0] + price.split(",")[1]
except:
Exception()
return float(price)
page = 1
while True:
if page != 1:
try:
driver.get(driver.current_url + "&page=" + str(page))
except:
break
for i in driver.find_elements_by_xpath('//*[@id="search"]/div[1]/div[2]/div/span[4]/div[1]'):
counter = 0
for element in i.find_elements_by_xpath('//div/div/div[2]/div[2]/div/div[2]/div[1]/div/div[1]/div/div/a'):
should_add = True
name = ""
price = ""
prev_price = ""
link = ""
try:
name = i.find_elements_by_tag_name('h2')[counter].text
price = convert_price_toNumber(element.find_element_by_class_name('a-price').text)
link = i.find_elements_by_xpath('//h2/a')[counter].get_attribute("href")
try:
prev_price = convert_price_toNumber(element.find_element_by_class_name('a-text-price').text)
except:
Exception()
prev_price = price
except:
print("exception")
should_add = False
product = Product(name, price, prev_price, link)
if should_add:
products.append(product)
counter = counter + 1
page = page +1
if page == 11:
break
print(page)
biggest_discount = 0.0
lowest_price = 0.0
chepest_product = Product("", "", "", "")
best_deal_product = Product("", "", "", "")
search_terms = search_term.split(" ")
run = 0
for product in products:
not_right = False
for word in search_terms:
if word.lower() not in product.name.lower():
not_right = True
if not not_right:
if run == 0:
lowest_price = product.price
chepest_product = product
run = 1
elif product.price < lowest_price:
lowest_price = product.price
chepest_product = product
discount = product.prev_price - product.price
if discount > biggest_discount:
biggest_discount = discount
best_deal_product = product
with open('products.json', 'w') as json_file:
data = {}
data["Products"] = []
for prod in products:
data["Products"].append(prod.serialize())
json.dump(data, json_file, sort_keys=True, indent=4)
print(json.dumps(chepest_product.serialize(), indent=4, sort_keys=True))
print(json.dumps(best_deal_product.serialize(), indent=4, sort_keys=True))
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome("/Users/kalle/Downloads/chromedriver", chrome_options=options)
driver.get(best_deal_product.link)
driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 't')