Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
whchien committed Jun 7, 2024
1 parent 7c2c5be commit 7cea11e
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 12 deletions.
2 changes: 1 addition & 1 deletion funda_scraper/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.1
1.2.0
47 changes: 36 additions & 11 deletions funda_scraper/scrape.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Main funda scraper module"""
import argparse
from collections import OrderedDict
import datetime
import json
import multiprocessing as mp
import os
from collections import OrderedDict
from typing import List, Optional
from urllib.parse import urlparse, urlunparse

import pandas as pd
import requests
Expand All @@ -17,7 +18,6 @@
from funda_scraper.preprocess import clean_date_format, preprocess_data
from funda_scraper.utils import logger

from urllib.parse import urlparse, urlunparse

class FundaScraper(object):
"""
Expand Down Expand Up @@ -104,10 +104,23 @@ def check_days_since(self) -> int:
@property
def check_sort(self) -> str:
"""Whether sort complies"""
if self.sort in [None, 'relevancy', 'date_down', 'date_up', 'price_up', 'price_down', 'floor_area_down', 'plot_area_down', 'city_up' 'postal_code_up']:
if self.sort in [
None,
"relevancy",
"date_down",
"date_up",
"price_up",
"price_down",
"floor_area_down",
"plot_area_down",
"city_up" "postal_code_up",
]:
return self.sort
else:
raise ValueError("'sort' must be either None, 'relevancy', 'date_down', 'date_up', 'price_up', 'price_down', 'floor_area_down', 'plot_area_down', 'city_up' or 'postal_code_up'.")
raise ValueError(
"'sort' must be either None, 'relevancy', 'date_down', 'date_up', 'price_up', 'price_down', "
"'floor_area_down', 'plot_area_down', 'city_up' or 'postal_code_up'. "
)

@staticmethod
def _check_dir() -> None:
Expand Down Expand Up @@ -168,19 +181,21 @@ def reset(
self.sort = sort

def remove_duplicates(self, lst):
return list(OrderedDict.fromkeys(lst))
return list(OrderedDict.fromkeys(lst))

def fix_link(self, link:str) -> str:
def fix_link(self, link: str) -> str:
link_url = urlparse(link)
link_path = link_url.path.split("/")
property_id = link_path.pop(5)
property_address = link_path.pop(4).split("-")
property_address = link_path.pop(4).split("-")
link_path = link_path[2:4]
property_address.insert(1, property_id)
link_path.extend(["-".join(property_address), "?old_ldp=true"])

return urlunparse((link_url.scheme, link_url.netloc, "/".join(link_path),'','',''))


return urlunparse(
(link_url.scheme, link_url.netloc, "/".join(link_path), "", "", "")
)

def fetch_all_links(self, page_start: int = None, n_pages: int = None) -> None:
"""Find all the available links across multiple pages."""

Expand Down Expand Up @@ -430,7 +445,17 @@ def run(
type=str,
help="Specify sorting",
default=None,
choices=[None, 'relevancy', 'date_down', 'date_up', 'price_up', 'price_down', 'floor_area_down', 'plot_area_down', 'city_up' 'postal_code_up'],
choices=[
None,
"relevancy",
"date_down",
"date_up",
"price_up",
"price_down",
"floor_area_down",
"plot_area_down",
"city_up" "postal_code_up",
],
)
parser.add_argument(
"--raw_data",
Expand Down

0 comments on commit 7cea11e

Please sign in to comment.