Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Google Maps filter (with passing linter and type checker) #477

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Satisfy the linter
codders committed Sep 19, 2023
commit e0f45f39c92075e022b2bbfc4ee11f0aa3abd2f6
4 changes: 2 additions & 2 deletions flathunter/crawler/vrmimmo.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@
import re
import hashlib

from bs4 import BeautifulSoup, Tag
from bs4 import BeautifulSoup

from flathunter.logging import logger
from flathunter.abstract_crawler import Crawler
@@ -29,7 +29,7 @@ def extract_data(self, soup: BeautifulSoup):
link = item.find("a", {"class": "js-item-title-link ci-search-result__link"})
url = link.get("href")
title = link.get("title")
logger.debug("Analyze " + url)
logger.debug("Analyze %s", url)

try:
price = item.find("div", {"class": "item__spec item-spec-price"}).text
10 changes: 7 additions & 3 deletions flathunter/dataclasses.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
This module contains dataclasses to help with serialisation and typechecking of data
sent to and received from the Google Maps Distance API
"""
from dataclasses import dataclass
from enum import Enum
from typing import Optional
@@ -58,6 +62,6 @@ class FilterChainName(Enum):
Maps API, to make a decision on this expose.

We separate the filter chains to avoid making expensive (literally!) calls to the
Google Maps API for exposes that we already know we aren't interested in anyway."""
preprocess = 'PREPROCESS'
postprocess = 'POSTPROCESS'
Google Maps API for exposes that we already know we aren't interested in anyway."""
PREPROCESS = 'PREPROCESS'
POSTPROCESS = 'POSTPROCESS'
19 changes: 9 additions & 10 deletions flathunter/filter.py
Original file line number Diff line number Diff line change
@@ -181,12 +181,12 @@ class DistanceFilter(AbstractFilter):

This must be in the post-processing filter chain, as it requires data
from the Google Maps API, which is not available right after scraping."""

distance_config: DistanceConfig

def __init__(self, distance_config: DistanceConfig):
self.distance_config = distance_config

def is_interesting(self, expose):
durations: Dict[str, DistanceElement] = expose.get('durations_unformatted', None)
if durations is None or self.distance_config.location_name not in durations:
@@ -211,7 +211,7 @@ def __init__(self):

def _append_filter_if_not_empty(
self,
filter_class: ABCMeta,
filter_class: ABCMeta,
filter_config: Any):
"""Appends a filter to the list if its configuration is set"""
if not filter_config:
@@ -220,7 +220,7 @@ def _append_filter_if_not_empty(

def read_config(self, config, filter_chain: FilterChainName):
"""Adds filters from a config dictionary"""
if filter_chain == FilterChainName.preprocess:
if filter_chain == FilterChainName.PREPROCESS:
self._append_filter_if_not_empty(TitleFilter, config.excluded_titles())
self._append_filter_if_not_empty(MinPriceFilter, config.min_price())
self._append_filter_if_not_empty(MaxPriceFilter, config.max_price())
@@ -230,9 +230,9 @@ def read_config(self, config, filter_chain: FilterChainName):
self._append_filter_if_not_empty(MaxRoomsFilter, config.max_rooms())
self._append_filter_if_not_empty(
PPSFilter, config.max_price_per_square())
elif filter_chain == FilterChainName.postprocess:
for df in config.max_distance():
self._append_filter_if_not_empty(DistanceFilter, df)
elif filter_chain == FilterChainName.POSTPROCESS:
for distance_filter in config.max_distance():
self._append_filter_if_not_empty(DistanceFilter, distance_filter)
else:
raise NotImplementedError()
return self
@@ -261,7 +261,7 @@ def is_interesting_expose(self, expose):
if not filter_.is_interesting(expose):
return False
return True

def filter(self, exposes):
"""Apply all filters to every expose in the list"""
return filter(self.is_interesting_expose, exposes)
@@ -270,4 +270,3 @@ def filter(self, exposes):
def builder():
"""Return a new filter chain builder"""
return FilterChainBuilder()

8 changes: 6 additions & 2 deletions flathunter/gmaps_duration_processor.py
Original file line number Diff line number Diff line change
@@ -4,8 +4,11 @@
from urllib.parse import quote_plus
from typing import Dict
import requests
from flathunter.dataclasses import DistanceElement, DistanceValueTuple, DurationValueTuple, TransportationModes

from flathunter.dataclasses import (DistanceElement,
DistanceValueTuple,
DurationValueTuple,
TransportationModes)
from flathunter.logging import logger
from flathunter.abstract_processor import Processor

@@ -47,7 +50,8 @@ def get_formatted_durations(self, address):
def _format_durations(self, durations: Dict[str, DistanceElement]):
out = ""
for location_name, val in durations.items():
out += f"> {location_name} ({val.mode.value}): {val.duration.text} ({val.distance.text})\n"
out += f"> {location_name} ({val.mode.value}): " + \
f"{val.duration.text} ({val.distance.text})\n"
return out.strip()

def _get_gmaps_distance(self, address, dest, mode) -> DistanceElement | None:
8 changes: 4 additions & 4 deletions flathunter/hunter.py
Original file line number Diff line number Diff line change
@@ -57,14 +57,14 @@ def hunt_flats(self, max_pages=None):
result.append(expose)

return result

def _build_preprocess_filter_chain(self, config) -> FilterChain:
return FilterChain.builder() \
.read_config(config, FilterChainName.preprocess) \
.read_config(config, FilterChainName.PREPROCESS) \
.filter_already_seen(self.id_watch) \
.build()

def _build_postprocess_filter_chain(self, config) -> FilterChain:
return FilterChain.builder() \
.read_config(config, FilterChainName.postprocess) \
.read_config(config, FilterChainName.POSTPROCESS) \
.build()
6 changes: 4 additions & 2 deletions flathunter/web/views.py
Original file line number Diff line number Diff line change
@@ -74,8 +74,10 @@ def filter_for_user():
if filter_values_for_user() is None:
return None
return (FilterChainBuilder()
.read_config(YamlConfig({'filters': filter_values_for_user()}), FilterChainName.preprocess)
.read_config(YamlConfig({'filters': filter_values_for_user()}), FilterChainName.postprocess)
.read_config(
YamlConfig({'filters': filter_values_for_user()}), FilterChainName.PREPROCESS)
.read_config(
YamlConfig({'filters': filter_values_for_user()}), FilterChainName.POSTPROCESS)
.build())

def form_filter_values():
2 changes: 1 addition & 1 deletion flathunter/web_hunter.py
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ def hunt_flats(self, max_pages=1):
"""Crawl all URLs, and send notifications to users of new flats"""
preprocess_filter_chain = self._build_preprocess_filter_chain(self.config)
postprocess_filter_chain = self._build_postprocess_filter_chain(self.config)
# note: we have to save all exposes *after* applying the processors because
# note: we have to save all exposes *after* applying the processors because
# the exposes later get loaded from disk to then be filtered again, so we need
# the additional information from the processor lest the postprocess chain breaks
# due to missing data