|
@@ -127,19 +127,10 @@ def get_meta_data(this_data, this_venue):
|
|
|
return race_day
|
|
return race_day
|
|
|
|
|
|
|
|
|
|
|
|
|
-def process_scratchings(this_data, this_venue):
|
|
|
|
|
- this_soup = BeautifulSoup(this_data, 'html.parser')
|
|
|
|
|
- try:
|
|
|
|
|
- this_scr = this_soup.select('div.scratchings')[0]
|
|
|
|
|
- except IndexError:
|
|
|
|
|
- return
|
|
|
|
|
- scratchings_count = this_scr.select('table')[0].select('tr')[2].select('td')[3].getText()
|
|
|
|
|
- # print('{}: scratchings_count {}'.format(this_venue.name, scratchings_count))
|
|
|
|
|
- header = this_scr.select('h3', text=re.compile('Scratchings'))[0]
|
|
|
|
|
- div = header.findNext('table')
|
|
|
|
|
|
|
+def scrape_scratchings(div, this_venue):
|
|
|
old_race = 0
|
|
old_race = 0
|
|
|
race = 0
|
|
race = 0
|
|
|
- scratchings = []
|
|
|
|
|
|
|
+ scraped_scratchings = []
|
|
|
for text in div.stripped_strings:
|
|
for text in div.stripped_strings:
|
|
|
if text[:5] == 'Race ':
|
|
if text[:5] == 'Race ':
|
|
|
match = re.search('^Race ([0-9]+):$', text)
|
|
match = re.search('^Race ([0-9]+):$', text)
|
|
@@ -164,7 +155,27 @@ def process_scratchings(this_data, this_venue):
|
|
|
print('The start of the offending line is: {}'.format(text[0:10]))
|
|
print('The start of the offending line is: {}'.format(text[0:10]))
|
|
|
continue
|
|
continue
|
|
|
temp_list = Scratching(this_venue.name, this_venue.state, this_venue.date, race, text)
|
|
temp_list = Scratching(this_venue.name, this_venue.state, this_venue.date, race, text)
|
|
|
- scratchings.append(temp_list)
|
|
|
|
|
|
|
+ scraped_scratchings.append(temp_list)
|
|
|
|
|
+ return scraped_scratchings
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def process_scratchings(this_data, this_venue):
|
|
|
|
|
+ this_soup = BeautifulSoup(this_data, 'html.parser')
|
|
|
|
|
+ try:
|
|
|
|
|
+ this_scr = this_soup.select('div.scratchings')[0]
|
|
|
|
|
+ except IndexError:
|
|
|
|
|
+ return
|
|
|
|
|
+ scratchings_count = this_scr.select('table')[0].select('tr')[2].select('td')[3].getText()
|
|
|
|
|
+ # print('{}: scratchings_count {}'.format(this_venue.name, scratchings_count))
|
|
|
|
|
+ header = this_scr.select('h3', text=re.compile('Scratchings'))[0]
|
|
|
|
|
+ div = header.findNext('table')
|
|
|
|
|
+
|
|
|
|
|
+ scratchings = []
|
|
|
|
|
+ early_scratchings = scrape_scratchings(div, this_venue)
|
|
|
|
|
+ header = this_scr.select('h3', text=re.compile('Late Scratchings'))[0]
|
|
|
|
|
+ div = header.findNext('table')
|
|
|
|
|
+ late_scratchings = scrape_scratchings(div, this_venue)
|
|
|
|
|
+ scratchings = early_scratchings + late_scratchings
|
|
|
# assert len(scratchings) == int(scratchings_count), 'len(scratchings) {} == scratchings_count {}'.format(
|
|
# assert len(scratchings) == int(scratchings_count), 'len(scratchings) {} == scratchings_count {}'.format(
|
|
|
# len(scratchings), scratchings_count)
|
|
# len(scratchings), scratchings_count)
|
|
|
# pprint(scratchings)
|
|
# pprint(scratchings)
|