-
Notifications
You must be signed in to change notification settings - Fork 2
/
jobCentre.py
65 lines (49 loc) · 2.14 KB
/
jobCentre.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from bs4 import BeautifulSoup
import requests
import pprint
jobLink = "https://www.jobcentrebrunei.gov.bn/web/guest/search-job?q={}"
class JobCentre:
def scrape(self, keyword):
# get the soup object based on the keyword passed to the argument
soup = self.getSoup(keyword)
# find all divs with the class of jp_job_post_main_wrapper
divs = soup.find_all("div", class_="jp_job_post_main_wrapper")
# init jobs array
jobs = []
# loop the divs and get the company and job details in each div
# and put it into a dictionary variable
for div in divs:
# get the data
company = div.find_all("div", class_="jp_job_post_right_cont")[0].find_all("p")[0].find_all("a")[0].text
title = div.find_all("div", class_="jp_job_post_right_cont")[0].find_all("h4")[0].find_all("a")[0].text
salary = div.find_all("li")[0].text
link = div.find_all("div", class_="jp_job_post_right_cont")[0].find_all("h4")[0].find_all("a", href=True)[0]['href']
URLlink = "http://www.jobcentrebrunei.gov.bn{}".format(link)
applyLink = "http://www.jobcentrebrunei.gov.bn/c/portal/login?p_l_id=95?redirectURL={}".format(link)
# put them into a dictionary
job = {
'company' : company,
'title' : title,
'salary' : "B$ {}".format(salary),
'link' : URLlink,
'applyLink' : applyLink
}
# append the job dictionary
jobs.append(job)
# just to view the jobs dictionary
# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(jobs)
return jobs
def getSoup(self, keyword):
# format the url before sending a request
url = jobLink.format(keyword)
# send a get request to the url
resp = requests.get(url)
# get the page content of the requested page (html, css, js)
text = resp.text
# convert to a BeautifulSoup object
soup = BeautifulSoup(text, "html.parser")
# duhh
return soup
# p = JobCentre()
# p.scrape('admin')