spothole/solarconditionsproviders/noaa3dayforecast.py

import logging
import re
from datetime import datetime, timezone

from solarconditionsproviders.http_solar_conditions_provider import HTTPSolarConditionsProvider

POLL_INTERVAL = 3600  # 1 hour
URL = "https://services.swpc.noaa.gov/text/3-day-forecast.txt"


class NOAA3dayForecast(HTTPSolarConditionsProvider):
    """Solar conditions provider using the NOAA 3-day forecast text file. Parses the NOAA forecast and populates
    corresponding fields in the solar conditions object.."""

    def __init__(self, provider_config):
        super().__init__(provider_config, URL, POLL_INTERVAL)

    def _http_response_to_solar_conditions(self, http_response):
        if http_response.status_code != 200:
            logging.warning("NOAA K-index forecast API returned HTTP " + str(http_response.status_code))
            return None

        lines = http_response.text.splitlines()

        # Find the "NOAA Kp index breakdown" section header
        start_idx = None
        for i, line in enumerate(lines):
            if "NOAA Kp index breakdown" in line:
                start_idx = i
                break

        if start_idx is None:
            logging.warning("NOAA K-index forecast: could not find 'NOAA Kp index breakdown' section")
            return None

        # Extract the year from the header line, e.g. "NOAA Kp index breakdown Apr 2-Apr 4, 2026"
        header_line = lines[start_idx]
        year_match = re.search(r'\b(\d{4})\b', header_line)
        if not year_match:
            logging.warning("NOAA K-index forecast: could not extract year from: " + header_line)
            return None
        year = int(year_match.group(1))

        # Parse the column date headers on the next line, e.g. "              Apr 02     Apr 03     Apr 04"
        if start_idx + 1 >= len(lines):
            logging.warning("NOAA K-index forecast: missing date header line")
            return None

        date_header_line = lines[start_idx + 2]
        date_matches = re.findall(r'([A-Za-z]{3})\s+(\d{2})', date_header_line)
        if not date_matches:
            logging.warning("NOAA K-index forecast: could not parse date headers from: " + date_header_line)
            return None

        column_dates = []
        for month_str, day_str in date_matches:
            try:
                column_dates.append(datetime.strptime(f"{day_str} {month_str} {year}", "%d %b %Y").date())
            except ValueError:
                logging.warning(f"NOAA K-index forecast: could not parse date: {month_str} {day_str} {year}")
                return None

        # Parse each data row, e.g. "00-03UT          2.00          3.00          2.00"
        k_index_forecast = {}
        for line in lines[start_idx + 3:]:
            time_match = re.match(r'^(\d{2})-(\d{2})UT\s+(.*)', line.strip())
            if not time_match:
                if k_index_forecast:
                    break
                continue

            start_hour = int(time_match.group(1))
            raw_values = time_match.group(3).split()

            for i, val in enumerate(raw_values):
                if i >= len(column_dates):
                    break
                # Discard bracketed values
                if val.startswith('(') and val.endswith(')'):
                    continue
                try:
                    kp = float(val)
                except ValueError:
                    continue

                date = column_dates[i]
                start_dt = datetime(date.year, date.month, date.day, start_hour, 0, 0, tzinfo=timezone.utc)

                # Key the data dict by start time
                key = start_dt.timestamp()
                k_index_forecast[key] = kp

        if not k_index_forecast:
            logging.warning("NOAA K-index forecast: no data rows parsed")
            return None

        return {"k_index_forecast": k_index_forecast}