-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAmazon Web Scraper Project
137 lines (86 loc) · 4.15 KB
/
Amazon Web Scraper Project
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# import libraries
from bs4 import BeautifulSoup
import requests
import time
import datetime
import smtplib
# Connect to Website and pull in data
URL = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data%2Banalyst%2Btshirt&qid=1626655184&sr=8-3&customId=B0752XJYNL&th=1'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), "html.parser")
title = soup2.find(id='productTitle').get_text()
price = soup2.find(id='priceblock_ourprice').get_text()
print(title)
print(price)
Funny Got Data MIS Data Systems Business Analyst T-Shirt
$16.99
# Clean up the data a little bit
price = price.strip()[1:]
title = title.strip()
print(title)
print(price)
Funny Got Data MIS Data Systems Business Analyst T-Shirt
16.99
# Create a Timestamp for your output to track when data was collected
import datetime
today = datetime.date.today()
print(today)
2021-08-21
# Create CSV and write headers and data into the file
import csv
header = ['Title', 'Price', 'Date']
data = [title, price, today]
with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerow(data)
import pandas as pd
df = pd.read_csv(r'C:\Users\alexf\AmazonWebScraperDataset.csv')
print(df)
#Now we are appending data to the csv
with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(data)
#Combine all of the above code into one function
def check_price():
URL = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data%2Banalyst%2Btshirt&qid=1626655184&sr=8-3&customId=B0752XJYNL&th=1'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), "html.parser")
title = soup2.find(id='productTitle').get_text()
price = soup2.find(id='priceblock_ourprice').get_text()
price = price.strip()[1:]
title = title.strip()
import datetime
today = datetime.date.today()
import csv
header = ['Title', 'Price', 'Date']
data = [title, price, today]
with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(data)
# Runs check_price after a set time and inputs data into your CSV
while(True):
check_price()
time.sleep(86400)
import pandas as pd
df = pd.read_csv(r'C:\Users\alexf\AmazonWebScraperDataset.csv')
print(df)
# If uou want to try sending yourself an email (just for fun) when a price hits below a certain level you can try it
# out with this script
def send_mail():
server = smtplib.SMTP_SSL('smtp.gmail.com',465)
server.ehlo()
#server.starttls()
server.ehlo()
server.login('[email protected]','xxxxxxxxxxxxxx')
subject = "The Shirt you want is below $15! Now is your chance to buy!"
body = "Alex, This is the moment we have been waiting for. Now is your chance to pick up the shirt of your dreams. Don't mess it up! Link here: https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data+analyst+tshirt&qid=1626655184&sr=8-3"
msg = f"Subject: {subject}\n\n{body}"
server.sendmail(
msg
)