#!/bin/bash # Diagnostic script to investigate container crashes echo "==========================================" echo "Container Crash Diagnostic" echo "==========================================" echo "" echo "1. Checking container status..." docker ps -a | grep usda-vision-api echo "" echo "2. Recent container exit codes and status..." docker inspect usda-vision-api --format='{{.State.Status}} - Exit Code: {{.State.ExitCode}} - Started: {{.State.StartedAt}} - Finished: {{.State.FinishedAt}}' 2>/dev/null || echo "Container not found" echo "" echo "3. Last 50 lines of container logs..." docker logs usda-vision-api --tail 50 2>&1 echo "" echo "4. Checking for common error patterns..." docker logs usda-vision-api 2>&1 | grep -iE "error|exception|failed|fatal|traceback|crash" | tail -20 echo "" echo "5. Checking container resource usage (if running)..." docker stats usda-vision-api --no-stream 2>/dev/null || echo "Container not running" echo "" echo "6. Checking if config file exists in container..." docker exec usda-vision-api ls -la /app/config.compose.json 2>/dev/null || echo "Cannot access container or file missing" echo "" echo "7. Checking Python process..." docker exec usda-vision-api ps aux | grep python 2>/dev/null || echo "Cannot access container" echo "" echo "8. Checking for OOM (Out of Memory) kills..." dmesg | grep -i "killed process" | tail -5 || echo "No OOM kills found in dmesg (may require sudo)" echo "" echo "9. Container restart count..." docker inspect usda-vision-api --format='Restart Count: {{.RestartCount}}' 2>/dev/null || echo "Container not found" echo "" echo "10. Checking docker-compose status..." docker compose ps api 2>/dev/null || echo "Cannot check compose status" echo "" echo "==========================================" echo "Diagnostic Complete" echo "==========================================" echo "" echo "Common causes of crashes:" echo " - MQTT connection failure (check broker at 192.168.1.110:1883)" echo " - Camera SDK initialization failure" echo " - Storage path issues (/mnt/nfs_share)" echo " - Out of memory (OOM)" echo " - Missing config file" echo " - Python exception not caught" echo ""