Skip to content

Commit

Permalink
Added user input webdriver
Browse files Browse the repository at this point in the history
  • Loading branch information
joeyism committed Dec 19, 2017
1 parent f1871a9 commit 23b8e01
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
linkedin_user_scraper.egg-info
dist
build
__pycache__
*.pyc
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,36 @@ To use it, just create the class
from linkedin_user_scraper.scraper import Person
person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5")
```

## API
Overall, to a Person object can be created with the following inputs:

```python
Person( linkedin_url=None, experiences = [], educations = [], driver = None)
```
#### `linkedin_url`
This is the linkedin url of their profile

#### `experiences`
This is the past experiences they have. A list of `linkedin_user_scraper.scraper.Experience`

#### `educations`
This is the past educations they have. A list of `linkedin_user_scraper.scraper.Education`

#### `driver`
This is the driver from which to scraper the Linkedin profile. A driver using Chrome is created by default. However, if a driver is passed in, that will be used instead.

For example
```python
driver = webdriver.Chrome()
person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver = driver)
```


## Versions
**1.1.x**
* Addes additional API where user can use their own webdriver

**1.0.x**
* first publish and fixes


13 changes: 9 additions & 4 deletions linkedin_user_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,31 @@ class Person(object):
also_viewed_urls = []
linkedin_url = None

def __init__(self, linkedin_url = None, experiences = [], educations = []):
def __init__(self, linkedin_url = None, experiences = [], educations = [], driver = None):
self.linkedin_url = linkedin_url
self.experiences = experiences
self.educations = educations

if self.linkedin_url != None:
self.__scrape_linkedin__()
self.__scrape_linkedin__(driver)

def add_experience(self, experience):
self.experiences.append(experience)

def add_education(self, education):
self.educations.append(education)

def __scrape_linkedin__(self):
def __scrape_linkedin__(self, driver = None):
if os.getenv("CHROMEDRIVER") == None:
driver_path = os.path.join(os.path.dirname(__file__), 'drivers/chromedriver')
else:
driver_path = os.getenv("CHROMEDRIVER")
driver = webdriver.Chrome(driver_path)

if driver is None:
try:
driver = webdriver.Chrome(driver_path)
except:
driver = webdriver.Chrome()
page = driver.get(self.linkedin_url)

# get name
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
setup(
name = 'linkedin_user_scraper',
packages = ['linkedin_user_scraper'], # this must be the same as the name above
version = '0.0.8',
version = '1.1.0',
description = 'Scrapes user data from Linkedin',
author = 'Joey Sham',
author_email = '[email protected]',
url = 'https://github.com/joeyism/linkedin_user_scraper', # use the URL to the github repo
download_url = 'https://github.com/joeyism/linkedin_user_scraper/dist/0.0.8.tar.gz',
download_url = 'https://github.com/joeyism/linkedin_user_scraper/dist/1.1.0.tar.gz',
keywords = ['linkedin', 'scraping', 'scraper'],
classifiers = [],
install_requires=['lxml', 'request', 'selenium'],
Expand Down

0 comments on commit 23b8e01

Please sign in to comment.