Attempt to fix CPU utilisation bug by preventing the heartbeat callback leak in the SSE stream handlers and replacing Timer-based with Event-based threads. Also compiled regexes in advance for DXCC callsign lookups for efficiency, and fixed my misunderstanding of what Queue.empty() does

This commit is contained in:
Ian Renton
2026-02-27 08:28:43 +00:00
parent e6c9bb1853
commit 068c732796
19 changed files with 107 additions and 94 deletions

View File

@@ -1,7 +1,6 @@
import logging import logging
from datetime import datetime from datetime import datetime
from threading import Timer, Thread from threading import Thread, Event
from time import sleep
import pytz import pytz
import requests import requests
@@ -18,22 +17,25 @@ class HTTPAlertProvider(AlertProvider):
super().__init__(provider_config) super().__init__(provider_config)
self.url = url self.url = url
self.poll_interval = poll_interval self.poll_interval = poll_interval
self.poll_timer = None self._stop_event = Event()
def start(self): def start(self):
# Fire off a one-shot thread to run poll() for the first time, just to ensure start() returns immediately and # Fire off the polling thread. It will poll immediately on startup, then sleep for poll_interval between
# the application can continue starting. The thread itself will then die, and the timer will kick in on its own # subsequent polls, so start() returns immediately and the application can continue starting.
# thread.
logging.info("Set up query of " + self.name + " alert API every " + str(self.poll_interval) + " seconds.") logging.info("Set up query of " + self.name + " alert API every " + str(self.poll_interval) + " seconds.")
thread = Thread(target=self.poll) self._thread = Thread(target=self._run, daemon=True)
thread.daemon = True self._thread.start()
thread.start()
def stop(self): def stop(self):
if self.poll_timer: self._stop_event.set()
self.poll_timer.cancel()
def poll(self): def _run(self):
while True:
self._poll()
if self._stop_event.wait(timeout=self.poll_interval):
break
def _poll(self):
try: try:
# Request data from API # Request data from API
logging.debug("Polling " + self.name + " alert API...") logging.debug("Polling " + self.name + " alert API...")
@@ -51,10 +53,8 @@ class HTTPAlertProvider(AlertProvider):
except Exception as e: except Exception as e:
self.status = "Error" self.status = "Error"
logging.exception("Exception in HTTP JSON Alert Provider (" + self.name + ")") logging.exception("Exception in HTTP JSON Alert Provider (" + self.name + ")")
sleep(1) # Brief pause on error before the next poll, but still respond promptly to stop()
self._stop_event.wait(timeout=1)
self.poll_timer = Timer(self.poll_interval, self.poll)
self.poll_timer.start()
# Convert an HTTP response returned by the API into alert data. The whole response is provided here so the subclass # Convert an HTTP response returned by the API into alert data. The whole response is provided here so the subclass
# implementations can check for HTTP status codes if necessary, and handle the response as JSON, XML, text, whatever # implementations can check for HTTP status codes if necessary, and handle the response as JSON, XML, text, whatever

View File

@@ -1,6 +1,6 @@
import logging import logging
from datetime import datetime from datetime import datetime
from threading import Timer from threading import Timer, Event, Thread
from time import sleep from time import sleep
import pytz import pytz
@@ -18,17 +18,23 @@ class CleanupTimer:
self.cleanup_timer = None self.cleanup_timer = None
self.last_cleanup_time = datetime.min.replace(tzinfo=pytz.UTC) self.last_cleanup_time = datetime.min.replace(tzinfo=pytz.UTC)
self.status = "Starting" self.status = "Starting"
self._stop_event = Event()
# Start the cleanup timer # Start the cleanup timer
def start(self): def start(self):
self.cleanup() self._thread = Thread(target=self._run, daemon=True)
self._thread.start()
# Stop any threads and prepare for application shutdown # Stop any threads and prepare for application shutdown
def stop(self): def stop(self):
self.cleanup_timer.cancel() self._stop_event.set()
def _run(self):
while not self._stop_event.wait(timeout=self.cleanup_interval):
self._cleanup()
# Perform cleanup and reschedule next timer # Perform cleanup and reschedule next timer
def cleanup(self): def _cleanup(self):
try: try:
# Perform cleanup via letting the data expire # Perform cleanup via letting the data expire
self.spots.expire() self.spots.expire()
@@ -61,7 +67,4 @@ class CleanupTimer:
except Exception as e: except Exception as e:
self.status = "Error" self.status = "Error"
logging.exception("Exception in Cleanup thread") logging.exception("Exception in Cleanup thread")
sleep(1) self._stop_event.wait(timeout=1)
self.cleanup_timer = Timer(self.cleanup_interval, self.cleanup)
self.cleanup_timer.start()

View File

@@ -101,6 +101,10 @@ class LookupHelper:
else: else:
logging.error("Could not download DXCC data, flags and similar data may be missing!") logging.error("Could not download DXCC data, flags and similar data may be missing!")
# Precompile regex matches for DXCCs to improve efficiency when iterating through them
for dxcc in self.DXCC_DATA.values():
dxcc["_prefixRegexCompiled"] = re.compile(dxcc["prefixRegex"])
# Download the cty.plist file from country-files.com on first startup. The pyhamtools lib can actually download and use # Download the cty.plist file from country-files.com on first startup. The pyhamtools lib can actually download and use
# this itself, but it's occasionally offline which causes it to throw an error. By downloading it separately, we can # this itself, but it's occasionally offline which causes it to throw an error. By downloading it separately, we can
# catch errors and handle them, falling back to a previous copy of the file in the cache, and we can use the # catch errors and handle them, falling back to a previous copy of the file in the cache, and we can use the
@@ -559,7 +563,7 @@ class LookupHelper:
# Utility method to get generic DXCC data from our lookup table, if we can find it # Utility method to get generic DXCC data from our lookup table, if we can find it
def get_dxcc_data_for_callsign(self, call): def get_dxcc_data_for_callsign(self, call):
for entry in self.DXCC_DATA.values(): for entry in self.DXCC_DATA.values():
if re.match(entry["prefixRegex"], call): if entry["_prefixRegexCompiled"].match(call):
return entry return entry
return None return None

View File

@@ -1,6 +1,6 @@
import os import os
from datetime import datetime from datetime import datetime
from threading import Timer from threading import Thread, Event
import psutil import psutil
import pytz import pytz
@@ -24,22 +24,30 @@ class StatusReporter:
self.spot_providers = spot_providers self.spot_providers = spot_providers
self.alerts = alerts self.alerts = alerts
self.alert_providers = alert_providers self.alert_providers = alert_providers
self.run_timer = None self._stop_event = Event()
self.startup_time = datetime.now(pytz.UTC) self.startup_time = datetime.now(pytz.UTC)
self.status_data["software-version"] = SOFTWARE_VERSION self.status_data["software-version"] = SOFTWARE_VERSION
self.status_data["server-owner-callsign"] = SERVER_OWNER_CALLSIGN self.status_data["server-owner-callsign"] = SERVER_OWNER_CALLSIGN
# Start the cleanup timer # Start the reporter thread
def start(self): def start(self):
self.run() self._thread = Thread(target=self._run, daemon=True)
self._thread.start()
# Stop any threads and prepare for application shutdown # Stop any threads and prepare for application shutdown
def stop(self): def stop(self):
self.run_timer.cancel() self._stop_event.set()
# Write status information and reschedule next timer # Thread entry point: report immediately on startup, then on each interval until stopped
def run(self): def _run(self):
while True:
self._report()
if self._stop_event.wait(timeout=self.run_interval):
break
# Write status information
def _report(self):
self.status_data["uptime"] = (datetime.now(pytz.UTC) - self.startup_time).total_seconds() self.status_data["uptime"] = (datetime.now(pytz.UTC) - self.startup_time).total_seconds()
self.status_data["mem_use_mb"] = round(psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024), 3) self.status_data["mem_use_mb"] = round(psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024), 3)
self.status_data["num_spots"] = len(self.spots) self.status_data["num_spots"] = len(self.spots)
@@ -73,7 +81,4 @@ class StatusReporter:
# Update Prometheus metrics # Update Prometheus metrics
memory_use_gauge.set(psutil.Process(os.getpid()).memory_info().rss * 1024) memory_use_gauge.set(psutil.Process(os.getpid()).memory_info().rss * 1024)
spots_gauge.set(len(self.spots)) spots_gauge.set(len(self.spots))
alerts_gauge.set(len(self.alerts)) alerts_gauge.set(len(self.alerts))
self.run_timer = Timer(self.run_interval, self.run)
self.run_timer.start()

View File

@@ -3,3 +3,12 @@
# to receive spots without complex handling. # to receive spots without complex handling.
def serialize_everything(obj): def serialize_everything(obj):
return obj.__dict__ return obj.__dict__
# Empty a queue
def empty_queue(q):
while not q.empty():
try:
q.get_nowait()
except:
break

View File

@@ -8,7 +8,7 @@ import tornado
import tornado_eventsource.handler import tornado_eventsource.handler
from core.prometheus_metrics_handler import api_requests_counter from core.prometheus_metrics_handler import api_requests_counter
from core.utils import serialize_everything from core.utils import serialize_everything, empty_queue
SSE_HANDLER_MAX_QUEUE_SIZE = 100 SSE_HANDLER_MAX_QUEUE_SIZE = 100
SSE_HANDLER_QUEUE_CHECK_INTERVAL = 5000 SSE_HANDLER_QUEUE_CHECK_INTERVAL = 5000
@@ -86,7 +86,11 @@ class APIAlertsStreamHandler(tornado_eventsource.handler.EventSourceHandler):
try: try:
if self.alert_queue in self.sse_alert_queues: if self.alert_queue in self.sse_alert_queues:
self.sse_alert_queues.remove(self.alert_queue) self.sse_alert_queues.remove(self.alert_queue)
self.alert_queue.empty() empty_queue(self.alert_queue)
except:
pass
try:
self.heartbeat.stop()
except: except:
pass pass
self.alert_queue = None self.alert_queue = None

View File

@@ -8,7 +8,7 @@ import tornado
import tornado_eventsource.handler import tornado_eventsource.handler
from core.prometheus_metrics_handler import api_requests_counter from core.prometheus_metrics_handler import api_requests_counter
from core.utils import serialize_everything from core.utils import serialize_everything, empty_queue
SSE_HANDLER_MAX_QUEUE_SIZE = 1000 SSE_HANDLER_MAX_QUEUE_SIZE = 1000
SSE_HANDLER_QUEUE_CHECK_INTERVAL = 5000 SSE_HANDLER_QUEUE_CHECK_INTERVAL = 5000
@@ -88,7 +88,11 @@ class APISpotsStreamHandler(tornado_eventsource.handler.EventSourceHandler):
try: try:
if self.spot_queue in self.sse_spot_queues: if self.spot_queue in self.sse_spot_queues:
self.sse_spot_queues.remove(self.spot_queue) self.sse_spot_queues.remove(self.spot_queue)
self.spot_queue.empty() empty_queue(self.spot_queue)
except:
pass
try:
self.heartbeat.stop()
except: except:
pass pass
self.spot_queue = None self.spot_queue = None

View File

@@ -5,6 +5,7 @@ import os
import tornado import tornado
from tornado.web import StaticFileHandler from tornado.web import StaticFileHandler
from core.utils import empty_queue
from server.handlers.api.addspot import APISpotHandler from server.handlers.api.addspot import APISpotHandler
from server.handlers.api.alerts import APIAlertsHandler, APIAlertsStreamHandler from server.handlers.api.alerts import APIAlertsHandler, APIAlertsStreamHandler
from server.handlers.api.lookups import APILookupCallHandler, APILookupSIGRefHandler, APILookupGridHandler from server.handlers.api.lookups import APILookupCallHandler, APILookupSIGRefHandler, APILookupGridHandler
@@ -105,7 +106,7 @@ class WebServer:
if q.full(): if q.full():
logging.warn("A full SSE spot queue was found, presumably because the client disconnected strangely. It has been removed.") logging.warn("A full SSE spot queue was found, presumably because the client disconnected strangely. It has been removed.")
self.sse_spot_queues.remove(q) self.sse_spot_queues.remove(q)
q.empty() empty_queue(q)
except: except:
# Probably got deleted already on another thread # Probably got deleted already on another thread
pass pass
@@ -114,7 +115,7 @@ class WebServer:
if q.full(): if q.full():
logging.warn("A full SSE alert queue was found, presumably because the client disconnected strangely. It has been removed.") logging.warn("A full SSE alert queue was found, presumably because the client disconnected strangely. It has been removed.")
self.sse_alert_queues.remove(q) self.sse_alert_queues.remove(q)
q.empty() empty_queue(q)
except: except:
# Probably got deleted already on another thread # Probably got deleted already on another thread
pass pass

View File

@@ -1,7 +1,6 @@
import logging import logging
from datetime import datetime from datetime import datetime
from threading import Timer, Thread from threading import Thread, Event
from time import sleep
import pytz import pytz
import requests import requests
@@ -18,22 +17,25 @@ class HTTPSpotProvider(SpotProvider):
super().__init__(provider_config) super().__init__(provider_config)
self.url = url self.url = url
self.poll_interval = poll_interval self.poll_interval = poll_interval
self.poll_timer = None self._stop_event = Event()
def start(self): def start(self):
# Fire off a one-shot thread to run poll() for the first time, just to ensure start() returns immediately and # Fire off the polling thread. It will poll immediately on startup, then sleep for poll_interval between
# the application can continue starting. The thread itself will then die, and the timer will kick in on its own # subsequent polls, so start() returns immediately and the application can continue starting.
# thread.
logging.info("Set up query of " + self.name + " spot API every " + str(self.poll_interval) + " seconds.") logging.info("Set up query of " + self.name + " spot API every " + str(self.poll_interval) + " seconds.")
thread = Thread(target=self.poll) self._thread = Thread(target=self._run, daemon=True)
thread.daemon = True self._thread.start()
thread.start()
def stop(self): def stop(self):
if self.poll_timer: self._stop_event.set()
self.poll_timer.cancel()
def poll(self): def _run(self):
while True:
self._poll()
if self._stop_event.wait(timeout=self.poll_interval):
break
def _poll(self):
try: try:
# Request data from API # Request data from API
logging.debug("Polling " + self.name + " spot API...") logging.debug("Polling " + self.name + " spot API...")
@@ -51,10 +53,7 @@ class HTTPSpotProvider(SpotProvider):
except Exception as e: except Exception as e:
self.status = "Error" self.status = "Error"
logging.exception("Exception in HTTP JSON Spot Provider (" + self.name + ")") logging.exception("Exception in HTTP JSON Spot Provider (" + self.name + ")")
sleep(1) self._stop_event.wait(timeout=1)
self.poll_timer = Timer(self.poll_interval, self.poll)
self.poll_timer.start()
# Convert an HTTP response returned by the API into spot data. The whole response is provided here so the subclass # Convert an HTTP response returned by the API into spot data. The whole response is provided here so the subclass
# implementations can check for HTTP status codes if necessary, and handle the response as JSON, XML, text, whatever # implementations can check for HTTP status codes if necessary, and handle the response as JSON, XML, text, whatever

View File

@@ -66,7 +66,7 @@
<p>This software is dedicated to the memory of Tom G1PJB, SK, a friend and colleague who sadly passed away around the time I started writing it in Autumn 2025. I was looking forward to showing it to you when it was done.</p> <p>This software is dedicated to the memory of Tom G1PJB, SK, a friend and colleague who sadly passed away around the time I started writing it in Autumn 2025. I was looking forward to showing it to you when it was done.</p>
</div> </div>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-about").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-about").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

View File

@@ -69,8 +69,8 @@
</div> </div>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script src="/js/add-spot.js?v=8"></script> <script src="/js/add-spot.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-add-spot").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-add-spot").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

View File

@@ -56,8 +56,8 @@
</div> </div>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script src="/js/alerts.js?v=8"></script> <script src="/js/alerts.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-alerts").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-alerts").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

View File

@@ -62,9 +62,9 @@
<script> <script>
let spotProvidersEnabledByDefault = {% raw json_encode(web_ui_options["spot-providers-enabled-by-default"]) %}; let spotProvidersEnabledByDefault = {% raw json_encode(web_ui_options["spot-providers-enabled-by-default"]) %};
</script> </script>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script src="/js/spotsbandsandmap.js?v=8"></script> <script src="/js/spotsbandsandmap.js?v=1772180923"></script>
<script src="/js/bands.js?v=8"></script> <script src="/js/bands.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-bands").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-bands").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

View File

@@ -46,10 +46,10 @@
crossorigin="anonymous"></script> crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/tinycolor2@1.6.0/cjs/tinycolor.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/tinycolor2@1.6.0/cjs/tinycolor.min.js"></script>
<script src="https://misc.ianrenton.com/jsutils/utils.js?v=8"></script> <script src="https://misc.ianrenton.com/jsutils/utils.js?v=1772180923"></script>
<script src="https://misc.ianrenton.com/jsutils/storage.js?v=8"></script> <script src="https://misc.ianrenton.com/jsutils/storage.js?v=1772180923"></script>
<script src="https://misc.ianrenton.com/jsutils/ui-ham.js?v=8"></script> <script src="https://misc.ianrenton.com/jsutils/ui-ham.js?v=1772180923"></script>
<script src="https://misc.ianrenton.com/jsutils/geo.js?v=8"></script> <script src="https://misc.ianrenton.com/jsutils/geo.js?v=1772180923"></script>
</head> </head>
<body> <body>

View File

@@ -70,9 +70,9 @@
<script> <script>
let spotProvidersEnabledByDefault = {% raw json_encode(web_ui_options["spot-providers-enabled-by-default"]) %}; let spotProvidersEnabledByDefault = {% raw json_encode(web_ui_options["spot-providers-enabled-by-default"]) %};
</script> </script>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script src="/js/spotsbandsandmap.js?v=8"></script> <script src="/js/spotsbandsandmap.js?v=1772180923"></script>
<script src="/js/map.js?v=8"></script> <script src="/js/map.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-map").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-map").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

View File

@@ -87,9 +87,9 @@
<script> <script>
let spotProvidersEnabledByDefault = {% raw json_encode(web_ui_options["spot-providers-enabled-by-default"]) %}; let spotProvidersEnabledByDefault = {% raw json_encode(web_ui_options["spot-providers-enabled-by-default"]) %};
</script> </script>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script src="/js/spotsbandsandmap.js?v=8"></script> <script src="/js/spotsbandsandmap.js?v=1772180923"></script>
<script src="/js/spots.js?v=8"></script> <script src="/js/spots.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-spots").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-spots").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

View File

@@ -3,8 +3,8 @@
<div id="status-container" class="row row-cols-1 row-cols-md-4 g-4 mt-4"></div> <div id="status-container" class="row row-cols-1 row-cols-md-4 g-4 mt-4"></div>
<script src="/js/common.js?v=8"></script> <script src="/js/common.js?v=1772180923"></script>
<script src="/js/status.js?v=8"></script> <script src="/js/status.js?v=1772180923"></script>
<script>$(document).ready(function() { $("#nav-link-status").addClass("active"); }); <!-- highlight active page in nav --></script> <script>$(document).ready(function() { $("#nav-link-status").addClass("active"); }); <!-- highlight active page in nav --></script>
{% end %} {% end %}

10
webassets/.idea/.gitignore generated vendored
View File

@@ -1,10 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Ignored default folder with query files
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>