Foppe
/
scratchings


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
							from bs4 import BeautifulSoup
import re
import datetime
from pytz import timezone
import model
import collections
# import pytz


"""
This module contains custom methods based on bs4.beautifulsoup to analyze data
"""

base_url = 'https://racingaustralia.horse/FreeFields/'
Venue = collections.namedtuple('Venue', 'state, name')
RaceDay = collections.namedtuple('RaceDay', Venue._fields + ('date_string', 'scratchings_url'))


def get_today_row(this_text):
    this_soup = BeautifulSoup(this_text, 'html.parser')
    rows = this_soup.select('tr.rows')
    my_row = rows[2]
    cells = my_row.select('td')
    i = 0
    states = ('NSW', 'VIC', 'QLD', 'WA', 'SA', 'TAS', 'ACT', 'NT')
    all_race_days = []
    day = 'Unknown'
    for cell in cells:
        if i == 0:
            day = cell.find('span').getText()
            # print("date: {}".format(day))
            i += 1
            continue
        venue_text = cell.find('p').getText().strip()
        if len(venue_text) > 0:
            # print("{}: {}".format(states[i-1], venue_text))
            this_a = cell.findAll('a')  # .get('href')
            for a in this_a:
                venue_name = a.getText().strip()
                this_venue = Venue(states[i - 1], venue_name)
                date_string = day
                this_url = a.get('href')
                scratchings_url = re.sub(r"/(.*)\.aspx", 'Scratchings.aspx', this_url)
                scratchings_url = base_url + scratchings_url
                this_race_day = RaceDay(this_venue.state, this_venue.name, date_string, scratchings_url)
                all_race_days.append(this_race_day)
        i += 1
    return all_race_days


def get_meta_data(this_data):
    this_soup = BeautifulSoup(this_data, 'html.parser')
    this_meta_data = this_soup.select('div.race-venue-bottom')[0].select('div.col2')[0]
    last_published_regex = re.compile('Scratchings Last Published: (.+? AEST)')
    close_regex = re.compile('Scratching close: (.+? AEST)')
    times = ['', 0, '', 0]
    time_format = '%a %d-%b-%y %I:%M%p'
    aest = timezone('Australia/Brisbane')
    if this_meta_data:
        this_meta_data = this_meta_data.getText()
        match = last_published_regex.search(this_meta_data)
        if match:
            print(match[1])
            times[0] = match[1][:-5]
            # times[0] = 'Thu 20-Jun-19 7:42AM'
            l_time = datetime.datetime.strptime(times[0], time_format)
            # print(aest.localize(l_time))
            times[1] = model.convert_to_unixtime(aest.localize(l_time))
            print(times[1])
        match = close_regex.search(this_meta_data)
        if match:
            print(match[1])
            times[2] = match[1][:-5]
            l_time = datetime.datetime.strptime(times[2], time_format)
            # print(aest.localize(l_time))
            times[3] = model.convert_to_unixtime(aest.localize(l_time))
            print(times[3])
    return times
    # print(this_meta_data)