Update Docker configuration, enhance error handling, and improve logging
- Added health check to the camera management API service in docker-compose.yml for better container reliability. - Updated installation scripts in Dockerfile to check for existing dependencies before installation, improving efficiency. - Enhanced error handling in the USDAVisionSystem class to allow partial operation if some components fail to start, preventing immediate shutdown. - Improved logging throughout the application, including more detailed error messages and critical error handling in the main loop. - Refactored WebSocketManager and CameraMonitor classes to use debug logging for connection events, reducing log noise.
This commit is contained in:
@@ -44,12 +44,12 @@ class WebSocketManager:
|
||||
async def connect(self, websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
self.active_connections.append(websocket)
|
||||
self.logger.info(f"WebSocket connected. Total connections: {len(self.active_connections)}")
|
||||
self.logger.debug(f"WebSocket connected. Total connections: {len(self.active_connections)}")
|
||||
|
||||
def disconnect(self, websocket: WebSocket):
|
||||
if websocket in self.active_connections:
|
||||
self.active_connections.remove(websocket)
|
||||
self.logger.info(f"WebSocket disconnected. Total connections: {len(self.active_connections)}")
|
||||
self.logger.debug(f"WebSocket disconnected. Total connections: {len(self.active_connections)}")
|
||||
|
||||
async def send_personal_message(self, message: dict, websocket: WebSocket):
|
||||
try:
|
||||
@@ -300,9 +300,11 @@ class APIServer:
|
||||
asyncio.set_event_loop(self._event_loop)
|
||||
|
||||
# Map our log level to uvicorn's log level
|
||||
# Use "warning" for uvicorn to reduce noise (connection open/close messages)
|
||||
# Application logs will still use the configured log level
|
||||
uvicorn_log_level_map = {
|
||||
"DEBUG": "debug",
|
||||
"INFO": "info",
|
||||
"DEBUG": "warning", # Suppress uvicorn DEBUG logs even if app is in DEBUG mode
|
||||
"INFO": "warning", # Suppress uvicorn INFO logs (connection messages)
|
||||
"WARNING": "warning",
|
||||
"ERROR": "error",
|
||||
"CRITICAL": "critical"
|
||||
|
||||
@@ -151,42 +151,42 @@ class CameraMonitor:
|
||||
|
||||
# ALWAYS check our streamer state first, before doing any camera availability tests
|
||||
streamer = self.camera_manager.camera_streamers.get(camera_name)
|
||||
self.logger.info(f"Checking streamer for {camera_name}: {streamer}")
|
||||
self.logger.debug(f"Checking streamer for {camera_name}: {streamer}")
|
||||
if streamer and streamer.is_streaming():
|
||||
self.logger.info(f"Camera {camera_name} is streaming - setting status to streaming")
|
||||
self.logger.debug(f"Camera {camera_name} is streaming - setting status to streaming")
|
||||
return "streaming", "Camera streaming (live preview)", self._get_device_info_dict(device_info)
|
||||
|
||||
# Also check if our recorder is active
|
||||
recorder = self.camera_manager.camera_recorders.get(camera_name)
|
||||
if recorder and recorder.hCamera and recorder.recording:
|
||||
self.logger.info(f"Camera {camera_name} is recording - setting status to available")
|
||||
self.logger.debug(f"Camera {camera_name} is recording - setting status to available")
|
||||
return "available", "Camera recording (in use by system)", self._get_device_info_dict(device_info)
|
||||
|
||||
# Check if camera is already opened by another process
|
||||
try:
|
||||
self.logger.info(f"Checking if camera {camera_name} is opened...")
|
||||
self.logger.debug(f"Checking if camera {camera_name} is opened...")
|
||||
is_opened = mvsdk.CameraIsOpened(device_info)
|
||||
self.logger.info(f"CameraIsOpened result for {camera_name}: {is_opened}")
|
||||
self.logger.debug(f"CameraIsOpened result for {camera_name}: {is_opened}")
|
||||
|
||||
if is_opened:
|
||||
self.logger.info(f"Camera {camera_name} is opened by another process - setting status to busy")
|
||||
self.logger.debug(f"Camera {camera_name} is opened by another process - setting status to busy")
|
||||
return "busy", "Camera opened by another process", self._get_device_info_dict(device_info)
|
||||
else:
|
||||
self.logger.info(f"Camera {camera_name} is not opened, will try initialization")
|
||||
self.logger.debug(f"Camera {camera_name} is not opened, will try initialization")
|
||||
# Camera is not opened, so we can try to initialize it
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"CameraIsOpened failed for {camera_name}: {e}")
|
||||
# If we can't determine the status, try to initialize to see what happens
|
||||
self.logger.info(f"CameraIsOpened failed for {camera_name}, will try initialization: {e}")
|
||||
self.logger.debug(f"CameraIsOpened failed for {camera_name}, will try initialization: {e}")
|
||||
|
||||
# Try to initialize camera briefly to test availability
|
||||
try:
|
||||
# Ensure SDK is initialized
|
||||
ensure_sdk_initialized()
|
||||
|
||||
self.logger.info(f"Attempting to initialize camera {camera_name} for availability test...")
|
||||
self.logger.debug(f"Attempting to initialize camera {camera_name} for availability test...")
|
||||
|
||||
# Check if camera is already in use by recorder or streamer before trying to initialize
|
||||
recorder = self.camera_manager.camera_recorders.get(camera_name) if self.camera_manager else None
|
||||
@@ -198,7 +198,7 @@ class CameraMonitor:
|
||||
# Check if recorder has camera open
|
||||
if mvsdk.CameraIsOpened(recorder.hCamera):
|
||||
camera_in_use = True
|
||||
self.logger.info(f"Camera {camera_name} is already in use by recorder (handle: {recorder.hCamera})")
|
||||
self.logger.debug(f"Camera {camera_name} is already in use by recorder (handle: {recorder.hCamera})")
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -207,13 +207,13 @@ class CameraMonitor:
|
||||
# Check if streamer has camera open
|
||||
if mvsdk.CameraIsOpened(streamer.hCamera):
|
||||
camera_in_use = True
|
||||
self.logger.info(f"Camera {camera_name} is already in use by streamer (handle: {streamer.hCamera})")
|
||||
self.logger.debug(f"Camera {camera_name} is already in use by streamer (handle: {streamer.hCamera})")
|
||||
except:
|
||||
pass
|
||||
|
||||
# If camera is already in use, mark as available (since it's working, just occupied)
|
||||
if camera_in_use:
|
||||
self.logger.info(f"Camera {camera_name} is in use by system components - marking as available")
|
||||
self.logger.debug(f"Camera {camera_name} is in use by system components - marking as available")
|
||||
return "available", "Camera is in use by system", self._get_device_info_dict(device_info)
|
||||
|
||||
# Suppress output to avoid MVCAMAPI error messages during camera testing
|
||||
@@ -221,7 +221,7 @@ class CameraMonitor:
|
||||
try:
|
||||
with suppress_camera_errors():
|
||||
hCamera = mvsdk.CameraInit(device_info, -1, -1)
|
||||
self.logger.info(f"Camera {camera_name} initialized successfully, starting test capture...")
|
||||
self.logger.debug(f"Camera {camera_name} initialized successfully, starting test capture...")
|
||||
except mvsdk.CameraException as init_e:
|
||||
error_msg = f"CameraInit failed for {camera_name}: {init_e.message} (error_code: {init_e.error_code})"
|
||||
|
||||
@@ -256,14 +256,14 @@ class CameraMonitor:
|
||||
mvsdk.CameraSetTriggerMode(hCamera, 0)
|
||||
mvsdk.CameraPlay(hCamera)
|
||||
|
||||
self.logger.info(f"Camera {camera_name} test: Attempting to capture frame with {CAMERA_TEST_CAPTURE_TIMEOUT}ms timeout...")
|
||||
self.logger.debug(f"Camera {camera_name} test: Attempting to capture frame with {CAMERA_TEST_CAPTURE_TIMEOUT}ms timeout...")
|
||||
# Try to capture with short timeout
|
||||
pRawData, FrameHead = mvsdk.CameraGetImageBuffer(hCamera, CAMERA_TEST_CAPTURE_TIMEOUT)
|
||||
mvsdk.CameraReleaseImageBuffer(hCamera, pRawData)
|
||||
|
||||
# Success - camera is available
|
||||
mvsdk.CameraUnInit(hCamera)
|
||||
self.logger.info(f"Camera {camera_name} test successful - camera is available")
|
||||
self.logger.debug(f"Camera {camera_name} test successful - camera is available")
|
||||
return "available", "Camera test successful", self._get_device_info_dict(device_info)
|
||||
|
||||
except mvsdk.CameraException as capture_e:
|
||||
|
||||
@@ -90,8 +90,20 @@ class CameraStreamer:
|
||||
"""Start streaming preview frames"""
|
||||
with self._lock:
|
||||
if self.streaming:
|
||||
self.logger.warning("Streaming already active")
|
||||
return True
|
||||
self.logger.warning("Streaming already active - checking if thread is alive")
|
||||
# Check if thread is actually running
|
||||
if self._streaming_thread and self._streaming_thread.is_alive():
|
||||
self.logger.info("Streaming thread is alive, returning success")
|
||||
return True
|
||||
else:
|
||||
# Thread died but flag wasn't reset - clean up and restart
|
||||
self.logger.warning("Streaming flag set but thread is dead - cleaning up and restarting")
|
||||
self.streaming = False
|
||||
if self.hCamera is not None:
|
||||
try:
|
||||
self._cleanup_camera()
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error cleaning up stale camera handle: {e}")
|
||||
|
||||
try:
|
||||
# Initialize camera for streaming
|
||||
@@ -249,6 +261,15 @@ class CameraStreamer:
|
||||
try:
|
||||
self.logger.info(f"Initializing camera for streaming: {self.camera_config.name}")
|
||||
|
||||
# Safety check: ensure no stale camera handle exists
|
||||
if self.hCamera is not None and not self._using_shared_camera:
|
||||
self.logger.warning("Stale camera handle detected during initialization - cleaning up first")
|
||||
try:
|
||||
mvsdk.CameraUnInit(self.hCamera)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error cleaning up stale handle: {e}")
|
||||
self.hCamera = None
|
||||
|
||||
# Check if recorder is active and has camera open - if so, share it
|
||||
if self.recorder and self.recorder.hCamera and self.recorder.recording:
|
||||
self.logger.info("Recorder is active with camera open - will share recorder's camera connection")
|
||||
@@ -423,11 +444,41 @@ class CameraStreamer:
|
||||
time.sleep(STREAMING_LOOP_SLEEP) # Just wait, recorder populates queues
|
||||
continue
|
||||
|
||||
# Safety check: ensure camera handle is valid before use
|
||||
if self.hCamera is None:
|
||||
self.logger.error("Camera handle is None in streaming loop - stopping")
|
||||
break
|
||||
|
||||
# Capture frame with timeout
|
||||
pRawData, FrameHead = mvsdk.CameraGetImageBuffer(self.hCamera, CAMERA_GET_BUFFER_TIMEOUT)
|
||||
try:
|
||||
pRawData, FrameHead = mvsdk.CameraGetImageBuffer(self.hCamera, CAMERA_GET_BUFFER_TIMEOUT)
|
||||
except mvsdk.CameraException as e:
|
||||
if e.error_code == mvsdk.CAMERA_STATUS_TIME_OUT:
|
||||
continue # Timeout is normal, continue
|
||||
else:
|
||||
self.logger.error(f"CameraGetImageBuffer failed: {e.message} (error_code: {e.error_code})")
|
||||
# If camera is invalid, break to prevent segfault
|
||||
if e.error_code in [mvsdk.CAMERA_STATUS_INVALID_HANDLE, mvsdk.CAMERA_STATUS_INVALID_PARAM]:
|
||||
self.logger.error("Invalid camera handle detected - stopping streaming loop")
|
||||
break
|
||||
time.sleep(BRIEF_PAUSE_SLEEP)
|
||||
continue
|
||||
|
||||
# Process frame
|
||||
mvsdk.CameraImageProcess(self.hCamera, pRawData, self.frame_buffer, FrameHead)
|
||||
try:
|
||||
mvsdk.CameraImageProcess(self.hCamera, pRawData, self.frame_buffer, FrameHead)
|
||||
except mvsdk.CameraException as e:
|
||||
self.logger.error(f"CameraImageProcess failed: {e.message} (error_code: {e.error_code})")
|
||||
# Release buffer before continuing
|
||||
try:
|
||||
mvsdk.CameraReleaseImageBuffer(self.hCamera, pRawData)
|
||||
except:
|
||||
pass
|
||||
if e.error_code in [mvsdk.CAMERA_STATUS_INVALID_HANDLE, mvsdk.CAMERA_STATUS_INVALID_PARAM]:
|
||||
self.logger.error("Invalid camera handle in image process - stopping streaming loop")
|
||||
break
|
||||
time.sleep(BRIEF_PAUSE_SLEEP)
|
||||
continue
|
||||
|
||||
# Convert to OpenCV format
|
||||
frame = self._convert_frame_to_opencv(FrameHead)
|
||||
@@ -477,7 +528,14 @@ class CameraStreamer:
|
||||
pass
|
||||
|
||||
# Release buffer
|
||||
mvsdk.CameraReleaseImageBuffer(self.hCamera, pRawData)
|
||||
try:
|
||||
mvsdk.CameraReleaseImageBuffer(self.hCamera, pRawData)
|
||||
except mvsdk.CameraException as e:
|
||||
self.logger.error(f"CameraReleaseImageBuffer failed: {e.message} (error_code: {e.error_code})")
|
||||
# If handle is invalid, break to prevent further issues
|
||||
if e.error_code in [mvsdk.CAMERA_STATUS_INVALID_HANDLE, mvsdk.CAMERA_STATUS_INVALID_PARAM]:
|
||||
self.logger.error("Invalid camera handle when releasing buffer - stopping streaming loop")
|
||||
break
|
||||
|
||||
# Control frame rate
|
||||
time.sleep(1.0 / self.preview_fps)
|
||||
@@ -491,6 +549,15 @@ class CameraStreamer:
|
||||
self.logger.error(f"Fatal error in streaming loop: {e}")
|
||||
finally:
|
||||
self.logger.info("Streaming loop ended")
|
||||
# Reset streaming flag when loop ends
|
||||
with self._lock:
|
||||
self.streaming = False
|
||||
# Cleanup camera resources if not already done
|
||||
if self.hCamera is not None and not self._using_shared_camera:
|
||||
try:
|
||||
self._cleanup_camera()
|
||||
except Exception as cleanup_e:
|
||||
self.logger.error(f"Error during cleanup after loop ended: {cleanup_e}")
|
||||
|
||||
def _convert_frame_to_opencv(self, FrameHead) -> Optional[np.ndarray]:
|
||||
"""Convert camera frame to OpenCV format"""
|
||||
|
||||
@@ -208,23 +208,50 @@ class USDAVisionSystem:
|
||||
def run(self) -> None:
|
||||
"""Run the system (blocking call)"""
|
||||
if not self.start():
|
||||
self.logger.error("Failed to start system")
|
||||
return
|
||||
self.logger.error("Failed to start system - some components may not be available")
|
||||
# Don't exit immediately - allow partial operation if some components started
|
||||
# Only exit if critical components failed
|
||||
if not self.running:
|
||||
self.logger.critical("Critical components failed to start - exiting")
|
||||
return
|
||||
|
||||
try:
|
||||
self.logger.info("System running... Press Ctrl+C to stop")
|
||||
|
||||
# Main loop - just keep the system alive
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 10
|
||||
|
||||
while self.running:
|
||||
time.sleep(1)
|
||||
try:
|
||||
time.sleep(1)
|
||||
consecutive_errors = 0 # Reset on successful iteration
|
||||
|
||||
# Periodic maintenance tasks could go here
|
||||
# For example: cleanup old recordings, health checks, etc.
|
||||
# Periodic maintenance tasks could go here
|
||||
# For example: cleanup old recordings, health checks, etc.
|
||||
|
||||
# Health check: verify critical components are still running
|
||||
if not self.mqtt_client.is_running():
|
||||
self.logger.warning("MQTT client stopped running - attempting restart")
|
||||
try:
|
||||
self.mqtt_client.start()
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to restart MQTT client: {e}")
|
||||
consecutive_errors += 1
|
||||
|
||||
except Exception as e:
|
||||
consecutive_errors += 1
|
||||
self.logger.error(f"Error in main loop (consecutive: {consecutive_errors}): {e}", exc_info=True)
|
||||
|
||||
# If too many consecutive errors, exit to prevent infinite crash loop
|
||||
if consecutive_errors >= max_consecutive_errors:
|
||||
self.logger.critical(f"Too many consecutive errors ({consecutive_errors}) - shutting down to prevent crash loop")
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
self.logger.info("Keyboard interrupt received")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error in main loop: {e}")
|
||||
self.logger.error(f"Unexpected error in main loop: {e}", exc_info=True)
|
||||
finally:
|
||||
self.stop()
|
||||
|
||||
@@ -270,8 +297,14 @@ def main():
|
||||
|
||||
try:
|
||||
system.run()
|
||||
except KeyboardInterrupt:
|
||||
logging.info("Interrupted by user")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logging.error(f"Fatal error: {e}")
|
||||
logging.critical(f"Fatal error: {e}", exc_info=True)
|
||||
# Give a moment for logs to flush
|
||||
import time
|
||||
time.sleep(1)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user