import psycopg2.extras import model from pprint import pprint import time import database import sys if __name__ == '__main__': row = 0 if len(sys.argv) > 1: row = sys.argv[1] start = time.time() db = database.db race_days_global = model.scrape_main_page(row) interim = time.time() print('interim 1 {}'.format(interim - start)) # pprint(race_days_global) race_days = [] raw_data_dict = {} for race_day in race_days_global: raw_data = model.get_raw_scratchings(race_day) race_day_details = model.process_raw_data(raw_data, race_day) race_days.append(race_day_details) raw_data_dict[race_day.name] = raw_data interim = time.time() print('interim 2 {}'.format(interim - start)) pprint(race_days) cursor = db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor) for race_day in race_days: if not raw_data_dict or not race_day: # raw_data_dict may be empty when there is no data available (yet) continue raw_data = raw_data_dict[race_day.name] scratchings = model.get_scratching_details(raw_data, race_day) if not scratchings: # model.get_scratchings_details may return empty continue # retrieve previous stored scratching for this venue / day query = "SELECT * FROM horses WHERE venue = %s AND race_date = %s;" cursor.execute(query, (race_day.name, race_day.date)) db_data = cursor.fetchall() # compare retrieved scratchings with new data match = False for scratching in scratchings: for row in db_data: # print(row) if ( scratching.date == row.race_date and scratching.venue == row.venue and scratching.race == row.race and scratching.horse == row.horse ): message = 'Horse found: date = {}, venue = {}, race = {}, horse = {}'.format(scratching.date, scratching.venue, scratching.race, scratching.horse) print(message) match = True if not match: # report new scratching message = 'New scratching: {} {} race {} horse {}'.format(scratching.date, scratching.venue, scratching.race, scratching.horse) print(message) # store new scratching query = "INSERT INTO horses(venue, race_date, race, horse) VALUES(%s, %s, %s, %s)" cursor.execute(query, (scratching.venue, scratching.date, scratching.race, scratching.horse)) db.commit() match = False cursor.close() db.close() interim = time.time() print('interim 3 {}'.format(interim - start))