|
@@ -0,0 +1,79 @@
|
|
|
|
|
+from bs4 import BeautifulSoup
|
|
|
|
|
+import re
|
|
|
|
|
+import datetime
|
|
|
|
|
+from pytz import timezone
|
|
|
|
|
+import model
|
|
|
|
|
+import collections
|
|
|
|
|
+# import pytz
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+"""
|
|
|
|
|
+This module contains custom methods based on bs4.beautifulsoup to analyze data
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+base_url = 'https://racingaustralia.horse/FreeFields/'
|
|
|
|
|
+Venue = collections.namedtuple('Venue', 'state, name')
|
|
|
|
|
+RaceDay = collections.namedtuple('RaceDay', Venue._fields + ('date_string', 'scratchings_url'))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_today_row(this_text):
|
|
|
|
|
+ this_soup = BeautifulSoup(this_text, 'html.parser')
|
|
|
|
|
+ rows = this_soup.select('tr.rows')
|
|
|
|
|
+ my_row = rows[2]
|
|
|
|
|
+ cells = my_row.select('td')
|
|
|
|
|
+ i = 0
|
|
|
|
|
+ states = ('NSW', 'VIC', 'QLD', 'WA', 'SA', 'TAS', 'ACT', 'NT')
|
|
|
|
|
+ all_race_days = []
|
|
|
|
|
+ day = 'Unknown'
|
|
|
|
|
+ for cell in cells:
|
|
|
|
|
+ if i == 0:
|
|
|
|
|
+ day = cell.find('span').getText()
|
|
|
|
|
+ # print("date: {}".format(day))
|
|
|
|
|
+ i += 1
|
|
|
|
|
+ continue
|
|
|
|
|
+ venue_text = cell.find('p').getText().strip()
|
|
|
|
|
+ if len(venue_text) > 0:
|
|
|
|
|
+ # print("{}: {}".format(states[i-1], venue_text))
|
|
|
|
|
+ this_a = cell.findAll('a') # .get('href')
|
|
|
|
|
+ for a in this_a:
|
|
|
|
|
+ venue_name = a.getText().strip()
|
|
|
|
|
+ this_venue = Venue(states[i - 1], venue_name)
|
|
|
|
|
+ date_string = day
|
|
|
|
|
+ this_url = a.get('href')
|
|
|
|
|
+ scratchings_url = re.sub(r"/(.*)\.aspx", 'Scratchings.aspx', this_url)
|
|
|
|
|
+ scratchings_url = base_url + scratchings_url
|
|
|
|
|
+ this_race_day = RaceDay(this_venue.state, this_venue.name, date_string, scratchings_url)
|
|
|
|
|
+ all_race_days.append(this_race_day)
|
|
|
|
|
+ i += 1
|
|
|
|
|
+ return all_race_days
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_meta_data(this_data):
|
|
|
|
|
+ this_soup = BeautifulSoup(this_data, 'html.parser')
|
|
|
|
|
+ this_meta_data = this_soup.select('div.race-venue-bottom')[0].select('div.col2')[0]
|
|
|
|
|
+ last_published_regex = re.compile('Scratchings Last Published: (.+? AEST)')
|
|
|
|
|
+ close_regex = re.compile('Scratching close: (.+? AEST)')
|
|
|
|
|
+ times = ['', 0, '', 0]
|
|
|
|
|
+ time_format = '%a %d-%b-%y %I:%M%p'
|
|
|
|
|
+ aest = timezone('Australia/Brisbane')
|
|
|
|
|
+ if this_meta_data:
|
|
|
|
|
+ this_meta_data = this_meta_data.getText()
|
|
|
|
|
+ match = last_published_regex.search(this_meta_data)
|
|
|
|
|
+ if match:
|
|
|
|
|
+ print(match[1])
|
|
|
|
|
+ times[0] = match[1][:-5]
|
|
|
|
|
+ # times[0] = 'Thu 20-Jun-19 7:42AM'
|
|
|
|
|
+ l_time = datetime.datetime.strptime(times[0], time_format)
|
|
|
|
|
+ # print(aest.localize(l_time))
|
|
|
|
|
+ times[1] = model.convert_to_unixtime(aest.localize(l_time))
|
|
|
|
|
+ print(times[1])
|
|
|
|
|
+ match = close_regex.search(this_meta_data)
|
|
|
|
|
+ if match:
|
|
|
|
|
+ print(match[1])
|
|
|
|
|
+ times[2] = match[1][:-5]
|
|
|
|
|
+ l_time = datetime.datetime.strptime(times[2], time_format)
|
|
|
|
|
+ # print(aest.localize(l_time))
|
|
|
|
|
+ times[3] = model.convert_to_unixtime(aest.localize(l_time))
|
|
|
|
|
+ print(times[3])
|
|
|
|
|
+ return times
|
|
|
|
|
+ # print(this_meta_data)
|