Parcourir la source

Finished the Racenet races information scraping in races.py

Foppe Hemminga il y a 6 ans
Parent
commit
fbeb204ba6
3 fichiers modifiés avec 31 ajouts et 4 suppressions
  1. 6 2
      _bs.py
  2. 1 1
      model.py
  3. 24 1
      races.py

+ 6 - 2
_bs.py

@@ -23,7 +23,7 @@ RaceDay = collections.namedtuple('RaceDay', RaceDayShort._fields + (
     'scratchings_close_datetime', 'scratchings_close_unixtime'))
 RawScratching = collections.namedtuple('RawScratching', 'venue state date race horse_no horse_display_name')
 Scratching = collections.namedtuple('Scratching', 'venue state date race time utc horse_no horse_display_name torn')
-RacenetRaces = collections.namedtuple('RacenetRaces', 'date venue_name state race_no local_time unix_time')
+RacenetRaces = collections.namedtuple('RacenetRaces', 'race_date venue state race start_time utctime')
 
 
 def get_today_row(this_text, this_row):
@@ -303,12 +303,15 @@ def get_racenet_races(html):
                                 continue
                             race_number = td.get('data-race-number')[1:]
                             start_time = td.get('data-start-time')
+                            local_time = arrow.get(date_parsed.format('YYYY-MM-DD')+' '+time_string, 'YYYY-MM-DD HH:mm').time()
+                            print(start_time)
+                            utc_time = arrow.get(int(start_time)/1000).datetime
                             # print("td.get('data-race-number'): {}". format(race_number))
                             # print("td.get('data-start-time'): {}".format(start_time))
                             # print("time_string: {}".format(time_string))
                             # All data is collected so we can populate the namedTuple
                             racenet_race = RacenetRaces(date_parsed.date(), venue_name, venue_state, race_number,
-                                                        time_string, start_time)
+                                                        local_time, utc_time)
                             venues.append(racenet_race)
                         continue
 
@@ -317,3 +320,4 @@ def get_racenet_races(html):
         sys.exit(1)
     pprint(venues)
     print('{} venues found'.format(len(venues)))
+    return venues

+ 1 - 1
model.py

@@ -53,7 +53,7 @@ def scrape_racenet_races_page():
     this_data = _html.get_page(this_url)
     # print(this_data[:50])
     json = _bs.get_racenet_races(this_data)
-    # return json
+    return json
 
 
 def get_raw_scratchings(this_venue):

+ 24 - 1
races.py

@@ -1,3 +1,26 @@
 import model
+import database
+import arrow
 
-model.scrape_racenet_races_page()
+
+venues = model.scrape_racenet_races_page()
+
+query = """
+    INSERT INTO race_program (
+        race_date, venue, state, race, start_time, utctime)
+    VALUES (%s, %s, %s, %s, %s, %s)
+    ON CONFLICT (utctime) DO UPDATE SET
+        race_date = excluded.race_date,
+        venue = excluded.venue,
+        race = excluded.race,
+        start_time = excluded.start_time,
+        state = excluded.state
+"""
+db = database.db
+cursor = db.cursor()
+if len(venues) > 0:
+    for venue in venues:
+        cursor.execute(query, venue)
+    db.commit()
+cursor.close()
+db.close()