| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- import re
- import _html
- import _bs
- import pytz
- import datetime
- # import time
- import psycopg2.extras
- # from pprint import pprint
- import arrow
- import view
- import warnings
- try:
- warnings.simplefilter("ignore", arrow.factory.ArrowParseWarning)
- except AttributeError:
- pass
- """
- Modules _html and _bs4 contain specialized methods.
- """
- local_timezones = {
- "NSW": "Australia/Sydney",
- "VIC": "Australia/Melbourne",
- "QLD": "Australia/Brisbane",
- "WA": "Australia/Perth",
- "SA": "Australia/Adelaide",
- "TAS": "Australia/Hobart",
- "ACT": "Australia/Sydney",
- "NT": "Australia/Darwin"}
- def scrape_racingaustralia_main_page(row):
- this_url = """https://racingaustralia.horse/Home.aspx"""
- this_data = _html.get_page(this_url)
- venues_all = _bs.get_today_row(this_data, row)
- return venues_all
- def scrape_racenet_main_page():
- this_url = """https://www.racenet.com.au/updates/scratchings"""
- this_data = _html.get_page(this_url)
- print(this_data[:50])
- json = _bs.get_racenet_json(this_data)
- return json
- def get_raw_scratchings(this_venue):
- this_raw_data = _html.get_page(this_venue.scratchings_url)
- return this_raw_data
- def process_raw_data(this_raw_data, this_venue):
- """
- Processes the raw data from the Scratchings page to obtain meta data.
- this_venue is passed to _bs.process_scratchings() to create the inherited namedTuple
- :param this_raw_data:
- :param this_venue:
- :return:
- """
- race_day_info = _bs.get_meta_data(this_raw_data, this_venue)
- return race_day_info
- def get_scratching_details(this_raw_data, this_venue):
- # this_data = _html.get_page(this_venue.scratchings_url)
- scratchings_info = _bs.process_scratchings(this_raw_data, this_venue)
- return scratchings_info
- def convert_to_unixtime(dt_object):
- """
- Simple utility function that returns the unixtime from a timezone aware dateTime object
- :param dt_object:
- :return:
- """
- utc = pytz.UTC
- d = dt_object.astimezone(utc)
- epoch = datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
- ts = int((d - epoch).total_seconds())
- return ts
- def convert_to_date(weird_string):
- """
- Converts a string like 'MONDAY 15 JUL' to a python datetime object
- :param weird_string:
- :return datetime object:
- """
- weird_string = re.sub(r' (\d) ', ' 0\1 ', weird_string)
- local_timezone = pytz.timezone('Australia/Sydney')
- now = datetime.datetime.now(local_timezone)
- calculated_date = datetime.datetime.strptime(str(now.year) + ' ' + weird_string, "%Y %A %d %b").date()
- # print(calculated_date)
- return calculated_date
- def send_messages(scratches, source):
- long_message = ''
- message_string = '{} {}venue = {} {} {}-{} | race = {} starts at {} | {} UTC | horse = {}'
- for m in scratches:
- flag = ''
- if m.torn:
- flag = 'FLAGGED!! '
- message = message_string.format(source,
- flag,
- m.date.strftime('%a'),
- m.date,
- m.state,
- m.venue,
- m.race,
- m.time,
- m.utc,
- '{} {}'.format(m.horse_no, m.horse_display_name))
- print('this_message: {}'.format(message))
- # Append message if possible
- if len(long_message) + len(message) < 5997:
- if len(long_message) == 0:
- long_message = message
- else:
- long_message += '\n' + message
- else:
- # Send long message (max 6k characters)
- print('Sending very long message > {}'.format(len(long_message)))
- view.broadcast(long_message)
- # Best would be to now store horses that were just broadcast
- long_message = m
- # Send all messages
- if len(long_message) > 0:
- print('Sending long_message > {}'.format(len(long_message)))
- view.broadcast(long_message)
- def store_scratched_horses(db, full_scratches):
- query = """INSERT INTO horses(venue, race_date, race, horse_no, horse_display_name)
- VALUES(%s, %s, %s, %s, %s)
- ON CONFLICT(venue, race_date, race, horse_no) DO NOTHING;"""
- scratches_to_return = []
- regex = r'^INSERT \d+ (\d+)$'
- cur3 = db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
- for this_scratching in full_scratches:
- database_entry = (this_scratching.venue, this_scratching.date,
- this_scratching.race, this_scratching.horse_no,
- this_scratching.horse_display_name)
- cur3.execute(query, database_entry)
- match = re.match(regex, cur3.statusmessage)
- status = 0
- if match:
- status = int(match.group(1))
- if status > 0:
- print('Stored: {}'.format(database_entry))
- print(cur3.statusmessage)
- scratches_to_return.append(this_scratching)
- cur3.close()
- db.commit()
- return scratches_to_return
- def get_race_from_races(this_haystack, needle_date, needle_venue, needle_race):
- """
- From the previous acquired database data this will return 'start_time',
- 'utctime' and torn (boolean) where the date, venue and race are given.
- :param this_haystack:
- :param needle_date:
- :param needle_venue:
- :param needle_race:
- :return:
- """
- return_values = ()
- needle_date = arrow.get(needle_date, 'YYYY-MM-DD').date()
- for race in this_haystack:
- # pprint(race)
- # print(race.race_date == needle_date)
- # print('race.race_date == needle_date: {} == {}'.format(race.race_date, needle_date))
- # print('type(race.race_date) == type(needle_date): {} == {}'.format(type(race.race_date), type(needle_date)))
- # print(race.venue == needle_venue)
- # print(race.race == needle_race)
- if ((race.race_date == needle_date) and (race.venue == needle_venue) and (
- race.race == needle_race)):
- return_values = (race.start_time, race.utctime, race.torn)
- break
- return return_values
- def get_relevant_races_from_database(db):
- query = """
- SELECT venue, start_time, race_date, utctime, race, torn FROM race_program
- WHERE race_date >= %s;
- """
- # Run this query once and use resulting NamedTuple for data
- cur_races = db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
- today = arrow.utcnow().date()
- # print('today: {}'.format(today))
- cur_races.execute(query, (today,))
- races = cur_races.fetchall()
- # print('len(races): {}'.format(len(races)))
- # pprint(races)
- cur_races.close()
- return races
|