Pārlūkot izejas kodu

Merge branch 'master' of https://git.fop.pe/Foppe/scratchings

Afwas [1337627 6 gadi atpakaļ
vecāks
revīzija
81bcc315d2
1 mainītis faili ar 23 papildinājumiem un 12 dzēšanām
  1. 23 12
      _bs.py

+ 23 - 12
_bs.py

@@ -127,19 +127,10 @@ def get_meta_data(this_data, this_venue):
     return race_day
 
 
-def process_scratchings(this_data, this_venue):
-    this_soup = BeautifulSoup(this_data, 'html.parser')
-    try:
-        this_scr = this_soup.select('div.scratchings')[0]
-    except IndexError:
-        return
-    scratchings_count = this_scr.select('table')[0].select('tr')[2].select('td')[3].getText()
-    # print('{}: scratchings_count {}'.format(this_venue.name, scratchings_count))
-    header = this_scr.select('h3', text=re.compile('Scratchings'))[0]
-    div = header.findNext('table')
+def scrape_scratchings(div, this_venue):
     old_race = 0
     race = 0
-    scratchings = []
+    scraped_scratchings = []
     for text in div.stripped_strings:
         if text[:5] == 'Race ':
             match = re.search('^Race ([0-9]+):$', text)
@@ -164,7 +155,27 @@ def process_scratchings(this_data, this_venue):
                 print('The start of the offending line is: {}'.format(text[0:10]))
                 continue
             temp_list = Scratching(this_venue.name, this_venue.state, this_venue.date, race, text)
-            scratchings.append(temp_list)
+            scraped_scratchings.append(temp_list)
+    return scraped_scratchings
+
+
+def process_scratchings(this_data, this_venue):
+    this_soup = BeautifulSoup(this_data, 'html.parser')
+    try:
+        this_scr = this_soup.select('div.scratchings')[0]
+    except IndexError:
+        return
+    scratchings_count = this_scr.select('table')[0].select('tr')[2].select('td')[3].getText()
+    # print('{}: scratchings_count {}'.format(this_venue.name, scratchings_count))
+    header = this_scr.select('h3', text=re.compile('Scratchings'))[0]
+    div = header.findNext('table')
+
+    scratchings = []
+    early_scratchings = scrape_scratchings(div, this_venue)
+    header = this_scr.select('h3', text=re.compile('Late Scratchings'))[0]
+    div = header.findNext('table')
+    late_scratchings = scrape_scratchings(div, this_venue)
+    scratchings = early_scratchings + late_scratchings
     # assert len(scratchings) == int(scratchings_count), 'len(scratchings) {} == scratchings_count {}'.format(
     #     len(scratchings), scratchings_count)
     # pprint(scratchings)