Update Docker configuration, enhance error handling, and improve logging

- Added health check to the camera management API service in docker-compose.yml for better container reliability.
- Updated installation scripts in Dockerfile to check for existing dependencies before installation, improving efficiency.
- Enhanced error handling in the USDAVisionSystem class to allow partial operation if some components fail to start, preventing immediate shutdown.
- Improved logging throughout the application, including more detailed error messages and critical error handling in the main loop.
- Refactored WebSocketManager and CameraMonitor classes to use debug logging for connection events, reducing log noise.
This commit is contained in:
salirezav
2025-12-03 17:23:31 -05:00
parent 2bce817b4e
commit 933d4417a5
30 changed files with 4314 additions and 220 deletions

View File

@@ -208,23 +208,50 @@ class USDAVisionSystem:
def run(self) -> None:
"""Run the system (blocking call)"""
if not self.start():
self.logger.error("Failed to start system")
return
self.logger.error("Failed to start system - some components may not be available")
# Don't exit immediately - allow partial operation if some components started
# Only exit if critical components failed
if not self.running:
self.logger.critical("Critical components failed to start - exiting")
return
try:
self.logger.info("System running... Press Ctrl+C to stop")
# Main loop - just keep the system alive
consecutive_errors = 0
max_consecutive_errors = 10
while self.running:
time.sleep(1)
try:
time.sleep(1)
consecutive_errors = 0 # Reset on successful iteration
# Periodic maintenance tasks could go here
# For example: cleanup old recordings, health checks, etc.
# Periodic maintenance tasks could go here
# For example: cleanup old recordings, health checks, etc.
# Health check: verify critical components are still running
if not self.mqtt_client.is_running():
self.logger.warning("MQTT client stopped running - attempting restart")
try:
self.mqtt_client.start()
except Exception as e:
self.logger.error(f"Failed to restart MQTT client: {e}")
consecutive_errors += 1
except Exception as e:
consecutive_errors += 1
self.logger.error(f"Error in main loop (consecutive: {consecutive_errors}): {e}", exc_info=True)
# If too many consecutive errors, exit to prevent infinite crash loop
if consecutive_errors >= max_consecutive_errors:
self.logger.critical(f"Too many consecutive errors ({consecutive_errors}) - shutting down to prevent crash loop")
break
except KeyboardInterrupt:
self.logger.info("Keyboard interrupt received")
except Exception as e:
self.logger.error(f"Unexpected error in main loop: {e}")
self.logger.error(f"Unexpected error in main loop: {e}", exc_info=True)
finally:
self.stop()
@@ -270,8 +297,14 @@ def main():
try:
system.run()
except KeyboardInterrupt:
logging.info("Interrupted by user")
sys.exit(0)
except Exception as e:
logging.error(f"Fatal error: {e}")
logging.critical(f"Fatal error: {e}", exc_info=True)
# Give a moment for logs to flush
import time
time.sleep(1)
sys.exit(1)