import requests, json, re, os, shutil, glob from datetime import datetime # --- FEED URLS --- STANDINGS_URL = ( "https://lscluster.hockeytech.com/feed/index.php?" "feed=statviewfeed&view=teams&groupTeamsBy=division&context=overall" "&site_id=2&season=110&special=false&key=1defb601c9b37c24" "&client_code=eojhl&league_id=2&conference=-1&division=-1" "&sort=points&lang=en&callback=angular.callbacks._4" ) SCHEDULE_URL = ( "https://lscluster.hockeytech.com/feed/index.php?" "feed=statviewfeed&view=schedule&team=-1&season=110&month=-1&location=homeaway" "&key=1defb601c9b37c24&client_code=eojhl&site_id=2&league_id=2" "&conference_id=-1&division_id=-1&lang=en&callback=angular.callbacks._4" ) STANDINGS_FILE = "eojhl_standings.json" SCHEDULE_FILE = "eojhl_scoreboard.json" MAX_BACKUPS = 5 # --- TEAM ABBREVIATION MAP --- # This maps the 'city' name from the raw data to the required abbreviation. TEAM_ABBR_MAP = { "Ottawa": "OTT", "Carleton Place": "CPC", "Ottawa West": "OW", "Richmond": "RCH", "Casselman": "CAS", "Smiths Falls": "SF", "Embrun": "EMB", "Perth": "PER", "Glengarry": "GB", "Arnprior": "ARP", "Athens": "ATH", "Renfrew": "REN", "Winchester": "WIN" } # --- HELPERS --- def backup_file(filename): if os.path.exists(filename): ts = datetime.now().strftime("%Y%m%d_%H%M%S") backup_name = f"{filename}.{ts}.bak" shutil.copy2(filename, backup_name) # Clean up old backups backups = sorted(glob.glob(f"{filename}.*.bak"), reverse=True) for old_backup in backups[MAX_BACKUPS:]: os.remove(old_backup) def fetch_jsonp(url): r = requests.get(url) # FIX: Updated regex for more reliable stripping of the angular.callbacks wrapper match = re.search(r"angular\.callbacks\._\d+\s*\((.*)\);?\s*$", r.text, re.DOTALL) if match: data_string = match.group(1).strip() try: data = json.loads(data_string) if isinstance(data, list): print("Successfully stripped JSONP and decoded data.") return data else: print("Error: Decoded JSON data is not a list (expected format).") return None except json.JSONDecodeError as e: print(f"Error decoding JSON from raw data string: {e}") return None print("Error: Could not find or strip the JSONP wrapper.") return None def parse_game_date(raw_date, raw_status): """ Parses the raw date ("Sep. 16") and raw status ("7:30 pm EST" or "Final") to generate an ISO 8601 date string. Hardcoded to '2025' for consistency. """ current_year = "2025" date_time_str = None # Clean up the raw_date for better parsing cleaned_raw_date = raw_date.replace('.', '') # Try to extract time for scheduled games (e.g., "7:30 pm EST") time_match = re.search(r"(\d{1,2}:\d{2})\s*(pm|am)", raw_status, re.IGNORECASE) if time_match: # Scheduled game time_str = time_match.group(0).replace(" ", "").upper() try: # Parse full date and time dt_obj = datetime.strptime(f"{cleaned_raw_date} {current_year} {time_str}", "%b %d %Y %I:%M%p") # Output in ISO 8601 format with Z (UTC marker) date_time_str = dt_obj.strftime("%Y-%m-%dT%H:%M:00Z") except Exception: pass else: # Completed game, use a default time (e.g., 7:30 PM) try: dt_obj = datetime.strptime(f"{cleaned_raw_date} {current_year}", "%b %d %Y") date_time_str = dt_obj.strftime("%Y-%m-%dT19:30:00Z") except Exception: pass return date_time_str def get_status_details(game_status): """Parses the game status string (e.g., "Final", "7:30 pm EST") into the required status structure.""" status = { "name": "STATUS_SCHEDULED", "state": "pre", "shortDetail": game_status, "period": 0, "displayClock": "00:00" } if "Final" in game_status: status["state"] = "post" status["name"] = "STATUS_FINAL" status["period"] = 3 status["shortDetail"] = "Final" if "OT" in game_status: status["shortDetail"] = "Final OT" status["name"] = "STATUS_FINAL_OVERTIME" status["period"] = 4 elif "SO" in game_status: status["shortDetail"] = "Final SO" status["name"] = "STATUS_FINAL_SHOOTOUT" status["period"] = 4 # Add logic for In Progress (assuming live data would show "3rd Period - 12:00") if re.search(r"(\d(st|nd|rd|th) Period|\d{2}:\d{2})", game_status): status["state"] = "in" status["name"] = "STATUS_IN_PROGRESS" status["shortDetail"] = game_status # Note: Parsing period/clock from live status is complex and often requires more specific regex. # We will keep period/clock at 0/00:00 or 3/00:00 unless the user provides the specific "in progress" raw status example. return status def transform_schedule(raw_data): events = [] # FIX: Use safe dictionary lookups to prevent IndexErrors game_list = None try: # Path: raw_data[0] -> "sections" (list) -> sections[0] (dict) -> "data" (list of games) game_list = raw_data[0].get("sections", [{}])[0].get("data") except (TypeError, IndexError, AttributeError): # This will catch if raw_data is not a list, sections is empty, or get fails deep down. print("Error: Could not safely extract 'data' list from raw data structure.") return {"events": []} if not game_list: print("Error: 'data' list is empty or None.") return {"events": []} for game_data in game_list: # The game data is stored in the 'row' key, team IDs are in 'prop' game_row = game_data.get("row", {}) game_prop = game_data.get("prop", {}) # --- Extract Data (Using Corrected Keys) --- home_city = game_row.get("home_team_city") away_city = game_row.get("visiting_team_city") home_score = game_row.get("home_goal_count", '0') away_score = game_row.get("visiting_goal_count", '0') game_id = game_row.get("game_id") game_status = game_row.get("game_status", "TBA") raw_date = game_row.get("date") # Skip if essential data is missing if not all([game_id, home_city, away_city, raw_date]): continue # --- Derive Data --- home_abbr = TEAM_ABBR_MAP.get(home_city) away_abbr = TEAM_ABBR_MAP.get(away_city) # Team IDs are deeply nested: prop -> home_team_city (key) -> teamLink (ID) home_team_id = game_prop.get("home_team_city", {}).get("teamLink") away_team_id = game_prop.get("visiting_team_city", {}).get("teamLink") status_details = get_status_details(game_status) date_time_str = parse_game_date(raw_date, game_status) # Clean scores: replace '-' with '0' home_score = home_score if home_score and home_score != '-' else '0' away_score = away_score if away_score and away_score != '-' else '0' # --- Build Event Object --- event = { "id": game_id, "date": date_time_str, "competitions": [{ "status": { "type": { "name": status_details["name"], "state": status_details["state"], "shortDetail": status_details["shortDetail"] }, "period": status_details["period"], "displayClock": status_details["displayClock"] }, "competitors": [ { "id": home_team_id, "homeAway": "home", "team": { "id": home_team_id, "abbreviation": home_abbr, "name": home_city, "logo": f"assets/sports/eojhl_logos/{home_abbr}.png" if home_abbr else None }, "score": home_score, "records": [{"summary": "0-0-0"}] }, { "id": away_team_id, "homeAway": "away", "team": { "id": away_team_id, "abbreviation": away_abbr, "name": away_city, "logo": f"assets/sports/eojhl_logos/{away_abbr}.png" if away_abbr else None }, "score": away_score, "records": [{"summary": "0-0-0"}] } ] }] } events.append(event) return {"events": events} # --- The main block (Placeholder for transform_standings) --- def transform_standings(raw_data): # This is a placeholder; you'll need your actual standings logic here. return {"standings": []} if __name__ == "__main__": print(f"Starting data fetch at {datetime.now().isoformat()}") # Assuming 'transform_standings' is defined backup_file(STANDINGS_FILE) backup_file(SCHEDULE_FILE) # Fetch and transform schedule data schedule_raw = fetch_jsonp(SCHEDULE_URL) if schedule_raw: schedule = transform_schedule(schedule_raw) else: schedule = {"events": []} # Fetch and transform standings data standings_raw = fetch_jsonp(STANDINGS_URL) standings = transform_standings(standings_raw) # Write output to file if schedule and schedule['events']: with open(SCHEDULE_FILE, "w") as f: json.dump(schedule, f, indent=2) print(f"Successfully wrote {len(schedule['events'])} events to {SCHEDULE_FILE}") else: print(f"Failed to transform schedule data. {SCHEDULE_FILE} not updated.") if standings: with open(STANDINGS_FILE, "w") as f: json.dump(standings, f, indent=2)