Last active
February 26, 2021 16:38
-
-
Save mmowbray/cce91258fd6ca6e910cfb01f6c6b0720 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
>centris-property-checker.py< | |
@Author: Maxwell Mowbray | |
@Email: mmowbray@mentlegen.com | |
@Date: February 2021 | |
@Description: | |
This script will periodically check a Centris personal link for new properties. | |
When a new property appears, the user is notified with a popup. | |
It can easily be adapted to do other things with the results. | |
A custom filter can also be defined, to only show popups of properties with certain parameters. | |
Usage: | |
1- Download the Chrome web driver from https://chromedriver.chromium.org/downloads and set its location on the disk in APP_SETTINGS | |
2- Add your personal Centris link below (APP_SETTINGS object) and run. | |
3- pip install bs4 selenium lxml tkinter | |
''' | |
import time | |
import re | |
from datetime import datetime | |
from bs4 import BeautifulSoup | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import TimeoutException | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from selenium import webdriver | |
import tkinter | |
from tkinter import messagebox | |
html = None | |
APP_SETTINGS = { | |
'centris_link': '', | |
'selenium_chrome_driver_location': './chromedriver', # https://chromedriver.chromium.org/downloads | |
'check_timeout_seconds': 300, # 5 minutes | |
'property_filter': lambda prop: prop['price'] < 400000 #lambda function that can filter results before alerting | |
} | |
results_container_selector = '#_ctl0_m_divAsyncPagedDisplays' | |
delay = 20 | |
# setup browser (headless) | |
print(datetime.now().strftime("%H:%M:%S") + " Searching Centris now.") | |
browser_options = Options() | |
browser_options.headless = True | |
browser = webdriver.Chrome(APP_SETTINGS['selenium_chrome_driver_location'], options=browser_options) | |
property_title_selector = 'div.d-wrapperTable>div.row>div.col-lg-7.col-md-6.col-sm-12>div.row>div.col-xs-12.col-sm-12>span' | |
property_price_selector = 'div.d-wrapperTable>div.row>div.col-lg-7.col-md-6.col-sm-12>div.row>div.col-xs-9.col-sm-8.col-md-8.col-lg-8>span' | |
listing_cache = {} | |
first_check = True | |
# hide main window | |
tkinter.Tk().withdraw() | |
while True: | |
try: | |
browser.get(APP_SETTINGS['centris_link']) | |
# wait for results to be displayed | |
WebDriverWait(browser, delay).until(EC.presence_of_element_located((By.CSS_SELECTOR, results_container_selector))) | |
except TimeoutException: | |
print('Loading took too much time!') | |
else: | |
html = browser.page_source | |
soup = BeautifulSoup(html, 'lxml') | |
for property_html in soup.select(results_container_selector)[0].findChildren('div', recursive=False): | |
property_result = {} | |
property_identifier = property_html.select_one(property_title_selector).text | |
property_result['price'] = int(re.sub('[^0-9]', '', property_html.select_one(property_price_selector).text)) | |
if not property_identifier in listing_cache: | |
#first time seeing this property | |
listing_cache[property_identifier] = property_result | |
if not first_check and APP_SETTINGS['property_filter'](property_result): | |
#alert for a new property | |
print(datetime.now().strftime("%H:%M:%S") + " New property found: " + property_identifier) | |
# messagebox.showinfo("New property found!", "There is a new property: " + property_identifier) | |
first_check = False | |
time.sleep(APP_SETTINGS['check_timeout_seconds']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment