Initial Commit

This commit is contained in:
2026-02-13 12:22:06 -05:00
parent 43fdccf67b
commit 8fec41e3e6
612 changed files with 1313484 additions and 2 deletions

277
data/get_eojhl_data.py Normal file
View File

@@ -0,0 +1,277 @@
import requests, json, re, os, shutil, glob
from datetime import datetime
# --- FEED URLS ---
STANDINGS_URL = (
"https://lscluster.hockeytech.com/feed/index.php?"
"feed=statviewfeed&view=teams&groupTeamsBy=division&context=overall"
"&site_id=2&season=110&special=false&key=1defb601c9b37c24"
"&client_code=eojhl&league_id=2&conference=-1&division=-1"
"&sort=points&lang=en&callback=angular.callbacks._4"
)
SCHEDULE_URL = (
"https://lscluster.hockeytech.com/feed/index.php?"
"feed=statviewfeed&view=schedule&team=-1&season=110&month=-1&location=homeaway"
"&key=1defb601c9b37c24&client_code=eojhl&site_id=2&league_id=2"
"&conference_id=-1&division_id=-1&lang=en&callback=angular.callbacks._4"
)
STANDINGS_FILE = "eojhl_standings.json"
SCHEDULE_FILE = "eojhl_scoreboard.json"
MAX_BACKUPS = 5
# --- TEAM ABBREVIATION MAP ---
# This maps the 'city' name from the raw data to the required abbreviation.
TEAM_ABBR_MAP = {
"Ottawa": "OTT",
"Carleton Place": "CPC",
"Ottawa West": "OW",
"Richmond": "RCH",
"Casselman": "CAS",
"Smiths Falls": "SF",
"Embrun": "EMB",
"Perth": "PER",
"Glengarry": "GB",
"Arnprior": "ARP",
"Athens": "ATH",
"Renfrew": "REN",
"Winchester": "WIN"
}
# --- HELPERS ---
def backup_file(filename):
if os.path.exists(filename):
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{filename}.{ts}.bak"
shutil.copy2(filename, backup_name)
# Clean up old backups
backups = sorted(glob.glob(f"{filename}.*.bak"), reverse=True)
for old_backup in backups[MAX_BACKUPS:]:
os.remove(old_backup)
def fetch_jsonp(url):
r = requests.get(url)
# FIX: Updated regex for more reliable stripping of the angular.callbacks wrapper
match = re.search(r"angular\.callbacks\._\d+\s*\((.*)\);?\s*$", r.text, re.DOTALL)
if match:
data_string = match.group(1).strip()
try:
data = json.loads(data_string)
if isinstance(data, list):
print("Successfully stripped JSONP and decoded data.")
return data
else:
print("Error: Decoded JSON data is not a list (expected format).")
return None
except json.JSONDecodeError as e:
print(f"Error decoding JSON from raw data string: {e}")
return None
print("Error: Could not find or strip the JSONP wrapper.")
return None
def parse_game_date(raw_date, raw_status):
"""
Parses the raw date ("Sep. 16") and raw status ("7:30 pm EST" or "Final")
to generate an ISO 8601 date string. Hardcoded to '2025' for consistency.
"""
current_year = "2025"
date_time_str = None
# Clean up the raw_date for better parsing
cleaned_raw_date = raw_date.replace('.', '')
# Try to extract time for scheduled games (e.g., "7:30 pm EST")
time_match = re.search(r"(\d{1,2}:\d{2})\s*(pm|am)", raw_status, re.IGNORECASE)
if time_match:
# Scheduled game
time_str = time_match.group(0).replace(" ", "").upper()
try:
# Parse full date and time
dt_obj = datetime.strptime(f"{cleaned_raw_date} {current_year} {time_str}", "%b %d %Y %I:%M%p")
# Output in ISO 8601 format with Z (UTC marker)
date_time_str = dt_obj.strftime("%Y-%m-%dT%H:%M:00Z")
except Exception:
pass
else:
# Completed game, use a default time (e.g., 7:30 PM)
try:
dt_obj = datetime.strptime(f"{cleaned_raw_date} {current_year}", "%b %d %Y")
date_time_str = dt_obj.strftime("%Y-%m-%dT19:30:00Z")
except Exception:
pass
return date_time_str
def get_status_details(game_status):
"""Parses the game status string (e.g., "Final", "7:30 pm EST") into the required status structure."""
status = {
"name": "STATUS_SCHEDULED",
"state": "pre",
"shortDetail": game_status,
"period": 0,
"displayClock": "00:00"
}
if "Final" in game_status:
status["state"] = "post"
status["name"] = "STATUS_FINAL"
status["period"] = 3
status["shortDetail"] = "Final"
if "OT" in game_status:
status["shortDetail"] = "Final OT"
status["name"] = "STATUS_FINAL_OVERTIME"
status["period"] = 4
elif "SO" in game_status:
status["shortDetail"] = "Final SO"
status["name"] = "STATUS_FINAL_SHOOTOUT"
status["period"] = 4
# Add logic for In Progress (assuming live data would show "3rd Period - 12:00")
if re.search(r"(\d(st|nd|rd|th) Period|\d{2}:\d{2})", game_status):
status["state"] = "in"
status["name"] = "STATUS_IN_PROGRESS"
status["shortDetail"] = game_status
# Note: Parsing period/clock from live status is complex and often requires more specific regex.
# We will keep period/clock at 0/00:00 or 3/00:00 unless the user provides the specific "in progress" raw status example.
return status
def transform_schedule(raw_data):
events = []
# FIX: Use safe dictionary lookups to prevent IndexErrors
game_list = None
try:
# Path: raw_data[0] -> "sections" (list) -> sections[0] (dict) -> "data" (list of games)
game_list = raw_data[0].get("sections", [{}])[0].get("data")
except (TypeError, IndexError, AttributeError):
# This will catch if raw_data is not a list, sections is empty, or get fails deep down.
print("Error: Could not safely extract 'data' list from raw data structure.")
return {"events": []}
if not game_list:
print("Error: 'data' list is empty or None.")
return {"events": []}
for game_data in game_list:
# The game data is stored in the 'row' key, team IDs are in 'prop'
game_row = game_data.get("row", {})
game_prop = game_data.get("prop", {})
# --- Extract Data (Using Corrected Keys) ---
home_city = game_row.get("home_team_city")
away_city = game_row.get("visiting_team_city")
home_score = game_row.get("home_goal_count", '0')
away_score = game_row.get("visiting_goal_count", '0')
game_id = game_row.get("game_id")
game_status = game_row.get("game_status", "TBA")
raw_date = game_row.get("date")
# Skip if essential data is missing
if not all([game_id, home_city, away_city, raw_date]):
continue
# --- Derive Data ---
home_abbr = TEAM_ABBR_MAP.get(home_city)
away_abbr = TEAM_ABBR_MAP.get(away_city)
# Team IDs are deeply nested: prop -> home_team_city (key) -> teamLink (ID)
home_team_id = game_prop.get("home_team_city", {}).get("teamLink")
away_team_id = game_prop.get("visiting_team_city", {}).get("teamLink")
status_details = get_status_details(game_status)
date_time_str = parse_game_date(raw_date, game_status)
# Clean scores: replace '-' with '0'
home_score = home_score if home_score and home_score != '-' else '0'
away_score = away_score if away_score and away_score != '-' else '0'
# --- Build Event Object ---
event = {
"id": game_id,
"date": date_time_str,
"competitions": [{
"status": {
"type": {
"name": status_details["name"],
"state": status_details["state"],
"shortDetail": status_details["shortDetail"]
},
"period": status_details["period"],
"displayClock": status_details["displayClock"]
},
"competitors": [
{
"id": home_team_id,
"homeAway": "home",
"team": {
"id": home_team_id,
"abbreviation": home_abbr,
"name": home_city,
"logo": f"assets/sports/eojhl_logos/{home_abbr}.png" if home_abbr else None
},
"score": home_score,
"records": [{"summary": "0-0-0"}]
},
{
"id": away_team_id,
"homeAway": "away",
"team": {
"id": away_team_id,
"abbreviation": away_abbr,
"name": away_city,
"logo": f"assets/sports/eojhl_logos/{away_abbr}.png" if away_abbr else None
},
"score": away_score,
"records": [{"summary": "0-0-0"}]
}
]
}]
}
events.append(event)
return {"events": events}
# --- The main block (Placeholder for transform_standings) ---
def transform_standings(raw_data):
# This is a placeholder; you'll need your actual standings logic here.
return {"standings": []}
if __name__ == "__main__":
print(f"Starting data fetch at {datetime.now().isoformat()}")
# Assuming 'transform_standings' is defined
backup_file(STANDINGS_FILE)
backup_file(SCHEDULE_FILE)
# Fetch and transform schedule data
schedule_raw = fetch_jsonp(SCHEDULE_URL)
if schedule_raw:
schedule = transform_schedule(schedule_raw)
else:
schedule = {"events": []}
# Fetch and transform standings data
standings_raw = fetch_jsonp(STANDINGS_URL)
standings = transform_standings(standings_raw)
# Write output to file
if schedule and schedule['events']:
with open(SCHEDULE_FILE, "w") as f:
json.dump(schedule, f, indent=2)
print(f"Successfully wrote {len(schedule['events'])} events to {SCHEDULE_FILE}")
else:
print(f"Failed to transform schedule data. {SCHEDULE_FILE} not updated.")
if standings:
with open(STANDINGS_FILE, "w") as f:
json.dump(standings, f, indent=2)