Update Docker configuration, enhance error handling, and improve logging

- Added health check to the camera management API service in docker-compose.yml for better container reliability.
- Updated installation scripts in Dockerfile to check for existing dependencies before installation, improving efficiency.
- Enhanced error handling in the USDAVisionSystem class to allow partial operation if some components fail to start, preventing immediate shutdown.
- Improved logging throughout the application, including more detailed error messages and critical error handling in the main loop.
- Refactored WebSocketManager and CameraMonitor classes to use debug logging for connection events, reducing log noise.
This commit is contained in:
salirezav
2025-12-03 17:23:31 -05:00
parent 2bce817b4e
commit 933d4417a5
30 changed files with 4314 additions and 220 deletions

View File

@@ -0,0 +1,61 @@
#!/bin/bash
# Diagnostic script to investigate container crashes
echo "=========================================="
echo "Container Crash Diagnostic"
echo "=========================================="
echo ""
echo "1. Checking container status..."
docker ps -a | grep usda-vision-api
echo ""
echo "2. Recent container exit codes and status..."
docker inspect usda-vision-api --format='{{.State.Status}} - Exit Code: {{.State.ExitCode}} - Started: {{.State.StartedAt}} - Finished: {{.State.FinishedAt}}' 2>/dev/null || echo "Container not found"
echo ""
echo "3. Last 50 lines of container logs..."
docker logs usda-vision-api --tail 50 2>&1
echo ""
echo "4. Checking for common error patterns..."
docker logs usda-vision-api 2>&1 | grep -iE "error|exception|failed|fatal|traceback|crash" | tail -20
echo ""
echo "5. Checking container resource usage (if running)..."
docker stats usda-vision-api --no-stream 2>/dev/null || echo "Container not running"
echo ""
echo "6. Checking if config file exists in container..."
docker exec usda-vision-api ls -la /app/config.compose.json 2>/dev/null || echo "Cannot access container or file missing"
echo ""
echo "7. Checking Python process..."
docker exec usda-vision-api ps aux | grep python 2>/dev/null || echo "Cannot access container"
echo ""
echo "8. Checking for OOM (Out of Memory) kills..."
dmesg | grep -i "killed process" | tail -5 || echo "No OOM kills found in dmesg (may require sudo)"
echo ""
echo "9. Container restart count..."
docker inspect usda-vision-api --format='Restart Count: {{.RestartCount}}' 2>/dev/null || echo "Container not found"
echo ""
echo "10. Checking docker-compose status..."
docker compose ps api 2>/dev/null || echo "Cannot check compose status"
echo ""
echo "=========================================="
echo "Diagnostic Complete"
echo "=========================================="
echo ""
echo "Common causes of crashes:"
echo " - MQTT connection failure (check broker at 192.168.1.110:1883)"
echo " - Camera SDK initialization failure"
echo " - Storage path issues (/mnt/nfs_share)"
echo " - Out of memory (OOM)"
echo " - Missing config file"
echo " - Python exception not caught"
echo ""