-
Notifications
You must be signed in to change notification settings - Fork 0
/
selenium_download.py
49 lines (40 loc) · 1.61 KB
/
selenium_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import sys
# The wget module
import wget
# The BeautifulSoup module
from bs4 import BeautifulSoup
# The selenium module
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from urllib import request
from selenium.common.exceptions import TimeoutException
driver = webdriver.Firefox(executable_path = 'geckodriver.exe')
start = 1
end = 10000
from urllib.error import URLError
driver = webdriver.Firefox(executable_path = 'geckodriver.exe')
for x in range(start, end):
try:
# https://www.irctc.co.in/nget/train-search
driver.get("https://www.irctc.co.in/eticketing/loginHome.jsf")
WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.ID, "captchaImg"))) # waits till the element with the specific id appears
imgLink = driver.find_element_by_id('captchaImg').get_attribute("src")
#imgLink = imgLink.split('theme1')[0] + 'banner'
#imgLink = imgLink.split('theme1')[0] + 'banner'
print(x)
start = x + 1
request.urlretrieve(imgLink, 'irct_nlpCatcha\\'+ str(x).zfill(4) +'.png')
except TimeoutException:
print("time out error")
start = start-1
driver.close()
driver = webdriver.Firefox(executable_path = 'geckodriver.exe')
except URLError:
print("url error")
start = start-1
driver.close()
driver = webdriver.Firefox(executable_path = 'geckodriver.exe') #