import datetime import re import pytz import _html import _bs import database import psycopg2.extras from pprint import pprint local_timezones = { "NSW": "Australia/Sydney", "VIC": "Australia/Melbourne", "QLD": "Australia/Brisbane", "WA": "Australia/Perth", "SA": "Australia/Adelaide", "TAS": "Australia/Hobart", "ACT": "Australia/Sydney", "NT": "Australia/Darwin"} def scrape_main_page(row=-1): """ Scrapes the main page of Racing Australia and returns the venues for this weeks races :param row: :return list of RaceDayShort namedtuples: """ this_url = """https://racingaustralia.horse/Home.aspx""" this_data = _html.get_page(this_url) venues_all = _bs.get_today_row(this_data, row) return venues_all def convert_to_date(weird_string): """ Converts a string like 'MONDAY 15 JUL' to a python datetime object :param weird_string: :return datetime object: """ weird_string = re.sub(r' (\d) ', ' 0\1 ', weird_string) local_timezone = pytz.timezone('Australia/Sydney') now = datetime.datetime.now(local_timezone) calculated_date = datetime.datetime.strptime(str(now.year) + ' ' + weird_string, "%Y %A %d %b").date() # print(calculated_date) return calculated_date def convert_to_tz_aware_datetime(date, time, state): """ Creates a datetime object to be stored as timestamptz in PostgreSQL :param date: :param time: :param state: :return timestamp: """ tz = pytz.timezone(local_timezones[state]) am_or_pm = time[-2:].lower() # print(am_or_pm) time_match = re.match(r'^(\d{1,2}):(\d{2})[AP]M$', time) hour = 0 minute = 0 if time_match: hour = int(time_match.group(1)) minute = int(time_match.group(2)) if am_or_pm == 'pm': hour = (hour % 12) + 12 timestamp = datetime.datetime(date.year, date.month, date.day, hour, minute, 0, 0) locale_aware_timestamp = tz.localize(timestamp) return locale_aware_timestamp def get_program_data(this_url): """ Retrieve the page from this_url :param this_url: :return: """ program_page = _html.get_page(this_url) races = _bs.separate_races(program_page) pprint(races) return races def create_json(this_db): """ Creates a json file with today's race data :return: """ query = """ SELECT venue, race, start_time, utctime, state FROM race_program WHERE race_date = %s;""" local_timezone = pytz.timezone('Australia/Sydney') now = datetime.datetime.now(local_timezone) cursor = this_db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor) def setup_database(): """ Set up for the database table :return: """ query = """ CREATE TABLE IF NOT EXISTS race_program ( id SERIAL, race_date DATE NOT NULL, venue TEXT NOT NULL, state TEXT, race INTEGER, start_time TIME, utctime TIMESTAMP WITH TIME ZONE, UNIQUE (race_date, venue, race)); """ db = database.db cursor = db.cursor() cursor.execute(query) db.commit() cursor.close() db.close()