| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- import datetime
- import re
- import pytz
- import _html
- import _bs
- import database
- import psycopg2.extras
- from pprint import pprint
- local_timezones = {
- "NSW": "Australia/Sydney",
- "VIC": "Australia/Melbourne",
- "QLD": "Australia/Brisbane",
- "WA": "Australia/Perth",
- "SA": "Australia/Adelaide",
- "TAS": "Australia/Hobart",
- "ACT": "Australia/Sydney",
- "NT": "Australia/Darwin"}
- def scrape_main_page(row=-1):
- """
- Scrapes the main page of Racing Australia and returns the
- venues for this weeks races
- :param row:
- :return list of RaceDayShort namedtuples:
- """
- this_url = """https://racingaustralia.horse/Home.aspx"""
- this_data = _html.get_page(this_url)
- venues_all = _bs.get_today_row(this_data, row)
- return venues_all
- def convert_to_date(weird_string):
- """
- Converts a string like 'MONDAY 15 JUL' to a python datetime object
- :param weird_string:
- :return datetime object:
- """
- weird_string = re.sub(r' (\d) ', ' 0\1 ', weird_string)
- local_timezone = pytz.timezone('Australia/Sydney')
- now = datetime.datetime.now(local_timezone)
- calculated_date = datetime.datetime.strptime(str(now.year) + ' ' + weird_string, "%Y %A %d %b").date()
- # print(calculated_date)
- return calculated_date
- def convert_to_tz_aware_datetime(date, time, state):
- """
- Creates a datetime object to be stored as timestamptz in PostgreSQL
- :param date:
- :param time:
- :param state:
- :return timestamp:
- """
- tz = pytz.timezone(local_timezones[state])
- am_or_pm = time[-2:].lower()
- # print(am_or_pm)
- time_match = re.match(r'^(\d{1,2}):(\d{2})[AP]M$', time)
- hour = 0
- minute = 0
- if time_match:
- hour = int(time_match.group(1))
- minute = int(time_match.group(2))
- if am_or_pm == 'pm':
- hour = (hour % 12) + 12
- timestamp = datetime.datetime(date.year, date.month, date.day, hour, minute, 0, 0)
- locale_aware_timestamp = tz.localize(timestamp)
- return locale_aware_timestamp
- def get_program_data(this_url):
- """
- Retrieve the page from this_url
- :param this_url:
- :return:
- """
- program_page = _html.get_page(this_url)
- races = _bs.separate_races(program_page)
- pprint(races)
- return races
- def create_json(this_db):
- """
- Creates a json file with today's race data
- :return:
- """
- query = """
- SELECT venue, race, start_time, utctime, state
- FROM race_program
- WHERE race_date = %s;"""
- local_timezone = pytz.timezone('Australia/Sydney')
- now = datetime.datetime.now(local_timezone)
- cursor = this_db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
- def setup_database():
- """
- Set up for the database table
- :return:
- """
- query = """
- CREATE TABLE IF NOT EXISTS race_program (
- id SERIAL,
- race_date DATE NOT NULL,
- venue TEXT NOT NULL,
- state TEXT,
- race INTEGER,
- start_time TIME,
- utctime TIMESTAMP WITH TIME ZONE,
- UNIQUE (race_date, venue, race));
- """
- db = database.db
- cursor = db.cursor()
- cursor.execute(query)
- db.commit()
- cursor.close()
- db.close()
|