bottleneck_finder.sh

Download
bash 427 lines 12.0 KB
  1#!/usr/bin/env bash
  2#
  3# Performance Bottleneck Finder
  4#
  5# This script diagnoses system performance bottlenecks by analyzing:
  6# - CPU usage and load
  7# - Memory usage and swap
  8# - Disk I/O
  9# - Network I/O
 10# - Process resource usage
 11#
 12# Features:
 13# - Color-coded output with warning/critical thresholds
 14# - Summary report generation
 15# - Export to file
 16# - Multiple analysis modes
 17
 18set -euo pipefail
 19
 20#------------------------------------------------------------------------------
 21# Configuration
 22#------------------------------------------------------------------------------
 23
 24readonly SCRIPT_NAME="$(basename "$0")"
 25
 26# Thresholds
 27readonly CPU_WARNING=70
 28readonly CPU_CRITICAL=90
 29readonly MEM_WARNING=75
 30readonly MEM_CRITICAL=90
 31readonly DISK_IO_WARNING=80
 32readonly DISK_IO_CRITICAL=95
 33readonly LOAD_WARNING=2.0
 34readonly LOAD_CRITICAL=4.0
 35
 36# Colors
 37readonly RED='\033[0;31m'
 38readonly YELLOW='\033[1;33m'
 39readonly GREEN='\033[0;32m'
 40readonly BLUE='\033[0;34m'
 41readonly NC='\033[0m' # No Color
 42
 43# Output settings
 44OUTPUT_FILE=""
 45VERBOSE=false
 46
 47#------------------------------------------------------------------------------
 48# Functions
 49#------------------------------------------------------------------------------
 50
 51usage() {
 52    cat <<EOF
 53Usage: $SCRIPT_NAME [OPTIONS]
 54
 55Diagnose system performance bottlenecks.
 56
 57OPTIONS:
 58    -h, --help              Show this help message
 59    -o, --output FILE       Export report to file
 60    -v, --verbose           Verbose output
 61    -a, --analyze TYPE      Analyze specific type: cpu|memory|disk|network|process|all
 62
 63EXAMPLES:
 64    $SCRIPT_NAME                        # Full analysis
 65    $SCRIPT_NAME -a cpu                 # CPU analysis only
 66    $SCRIPT_NAME -o report.txt          # Export to file
 67
 68EOF
 69}
 70
 71print_header() {
 72    local title="$1"
 73    echo ""
 74    echo -e "${BLUE}========================================${NC}"
 75    echo -e "${BLUE}${title}${NC}"
 76    echo -e "${BLUE}========================================${NC}"
 77}
 78
 79print_status() {
 80    local value="$1"
 81    local threshold_warn="$2"
 82    local threshold_crit="$3"
 83    local unit="${4:-%}"
 84
 85    if (( $(echo "$value >= $threshold_crit" | bc -l) )); then
 86        echo -e "${RED}${value}${unit} [CRITICAL]${NC}"
 87    elif (( $(echo "$value >= $threshold_warn" | bc -l) )); then
 88        echo -e "${YELLOW}${value}${unit} [WARNING]${NC}"
 89    else
 90        echo -e "${GREEN}${value}${unit} [OK]${NC}"
 91    fi
 92}
 93
 94analyze_cpu() {
 95    print_header "CPU Analysis"
 96
 97    echo "CPU Information:"
 98    if command -v lscpu &>/dev/null; then
 99        lscpu | grep -E "^(Model name|CPU\(s\)|Thread|Core|Socket)"
100    fi
101
102    echo ""
103    echo "Current CPU Usage:"
104
105    # Get CPU usage (requires top or mpstat)
106    if command -v mpstat &>/dev/null; then
107        local cpu_usage
108        cpu_usage=$(mpstat 1 1 | awk '/Average:/ {print 100 - $NF}')
109        echo -n "  Overall CPU: "
110        print_status "$cpu_usage" "$CPU_WARNING" "$CPU_CRITICAL"
111
112        # Per-CPU stats
113        if [[ "$VERBOSE" == "true" ]]; then
114            echo ""
115            echo "  Per-CPU usage:"
116            mpstat -P ALL 1 1 | tail -n +4
117        fi
118    else
119        # Fallback to top
120        local cpu_usage
121        cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
122        echo -n "  CPU Usage: "
123        print_status "$cpu_usage" "$CPU_WARNING" "$CPU_CRITICAL"
124    fi
125
126    echo ""
127    echo "Load Average:"
128    local load_1min load_5min load_15min
129    read -r load_1min load_5min load_15min _ _ < /proc/loadavg
130
131    echo -n "  1-min:  "
132    print_status "$load_1min" "$LOAD_WARNING" "$LOAD_CRITICAL" ""
133
134    echo -n "  5-min:  "
135    print_status "$load_5min" "$LOAD_WARNING" "$LOAD_CRITICAL" ""
136
137    echo -n "  15-min: "
138    print_status "$load_15min" "$LOAD_WARNING" "$LOAD_CRITICAL" ""
139
140    echo ""
141    echo "Top CPU Consumers:"
142    ps aux --sort=-%cpu | head -6 | awk 'NR==1 {print "  " $0} NR>1 {printf "  %-10s %5s %5s %s\n", $1, $3, $4, $11}'
143}
144
145analyze_memory() {
146    print_header "Memory Analysis"
147
148    echo "Memory Usage:"
149
150    if [[ -f /proc/meminfo ]]; then
151        local mem_total mem_available mem_free mem_used mem_percent
152
153        mem_total=$(awk '/^MemTotal:/ {print $2}' /proc/meminfo)
154        mem_available=$(awk '/^MemAvailable:/ {print $2}' /proc/meminfo)
155        mem_free=$(awk '/^MemFree:/ {print $2}' /proc/meminfo)
156
157        mem_used=$((mem_total - mem_available))
158        mem_percent=$(awk "BEGIN {printf \"%.1f\", ($mem_used / $mem_total) * 100}")
159
160        echo "  Total:     $((mem_total / 1024)) MB"
161        echo "  Used:      $((mem_used / 1024)) MB"
162        echo "  Available: $((mem_available / 1024)) MB"
163        echo -n "  Usage:     "
164        print_status "$mem_percent" "$MEM_WARNING" "$MEM_CRITICAL"
165    fi
166
167    echo ""
168    echo "Swap Usage:"
169
170    if [[ -f /proc/meminfo ]]; then
171        local swap_total swap_free swap_used swap_percent
172
173        swap_total=$(awk '/^SwapTotal:/ {print $2}' /proc/meminfo)
174        swap_free=$(awk '/^SwapFree:/ {print $2}' /proc/meminfo)
175
176        if [[ $swap_total -gt 0 ]]; then
177            swap_used=$((swap_total - swap_free))
178            swap_percent=$(awk "BEGIN {printf \"%.1f\", ($swap_used / $swap_total) * 100}")
179
180            echo "  Total:     $((swap_total / 1024)) MB"
181            echo "  Used:      $((swap_used / 1024)) MB"
182            echo -n "  Usage:     "
183            print_status "$swap_percent" "$MEM_WARNING" "$MEM_CRITICAL"
184        else
185            echo "  No swap configured"
186        fi
187    fi
188
189    echo ""
190    echo "Top Memory Consumers:"
191    ps aux --sort=-%mem | head -6 | awk 'NR==1 {print "  " $0} NR>1 {printf "  %-10s %5s %5s %s\n", $1, $3, $4, $11}'
192}
193
194analyze_disk() {
195    print_header "Disk I/O Analysis"
196
197    echo "Disk Usage:"
198    df -h | awk 'NR==1 {print "  " $0} NR>1 && $1 ~ /^\/dev\// {print "  " $0}'
199
200    echo ""
201    echo "Disk I/O Statistics:"
202
203    if command -v iostat &>/dev/null; then
204        iostat -x 1 2 | tail -n +4 | grep -v "^$"
205    else
206        echo "  iostat not available"
207
208        if [[ -f /proc/diskstats ]]; then
209            echo "  Raw disk stats from /proc/diskstats:"
210            awk '{if ($4 > 0) print "  " $3, "reads:", $4, "writes:", $8}' /proc/diskstats
211        fi
212    fi
213
214    echo ""
215    echo "Disk Space Warnings:"
216    df -h | awk 'NR>1 && $1 ~ /^\/dev\// {
217        usage = substr($5, 1, length($5)-1);
218        if (usage >= 90) {
219            print "  [CRITICAL] " $6 " is " usage "% full"
220        } else if (usage >= 75) {
221            print "  [WARNING] " $6 " is " usage "% full"
222        }
223    }'
224}
225
226analyze_network() {
227    print_header "Network Analysis"
228
229    echo "Network Interfaces:"
230    if command -v ip &>/dev/null; then
231        ip -s link | grep -E "^[0-9]+:|RX:|TX:" | awk '{
232            if ($0 ~ /^[0-9]+:/) {
233                printf "  %-20s", $2
234            } else if ($0 ~ /RX:/) {
235                getline; printf "RX: %s bytes  ", $1
236            } else if ($0 ~ /TX:/) {
237                getline; printf "TX: %s bytes\n", $1
238            }
239        }'
240    else
241        ifconfig | grep -E "^[a-z]|RX packets|TX packets"
242    fi
243
244    echo ""
245    echo "Network Connections:"
246    if command -v ss &>/dev/null; then
247        echo "  Active connections by state:"
248        ss -s | grep -v "^Total:"
249    else
250        echo "  Active connections:"
251        netstat -an | awk '/^tcp/ {print $6}' | sort | uniq -c | sort -rn | awk '{print "  " $2 ": " $1}'
252    fi
253
254    echo ""
255    echo "Top Network Processes:"
256    if command -v ss &>/dev/null; then
257        ss -tunap | grep -v "State" | awk '{print $NF}' | grep -o '".*"' | tr -d '"' | sort | uniq -c | sort -rn | head -5 | awk '{print "  " $2 " (" $1 " connections)"}'
258    else
259        echo "  Detailed process info not available (requires ss)"
260    fi
261}
262
263analyze_processes() {
264    print_header "Process Analysis"
265
266    echo "Process Count:"
267    local total_procs running_procs sleeping_procs zombie_procs
268
269    total_procs=$(ps aux | wc -l)
270    running_procs=$(ps aux | awk '$8 ~ /R/ {count++} END {print count+0}')
271    sleeping_procs=$(ps aux | awk '$8 ~ /S/ {count++} END {print count+0}')
272    zombie_procs=$(ps aux | awk '$8 ~ /Z/ {count++} END {print count+0}')
273
274    echo "  Total:    $total_procs"
275    echo "  Running:  $running_procs"
276    echo "  Sleeping: $sleeping_procs"
277
278    if [[ $zombie_procs -gt 0 ]]; then
279        echo -e "  ${RED}Zombies:  $zombie_procs [WARNING]${NC}"
280    else
281        echo "  Zombies:  $zombie_procs"
282    fi
283
284    echo ""
285    echo "Top Processes by CPU:"
286    ps aux --sort=-%cpu | head -6 | awk '{printf "  %-10s %5s%% CPU  %5s%% MEM  %s\n", $1, $3, $4, $11}'
287
288    echo ""
289    echo "Top Processes by Memory:"
290    ps aux --sort=-%mem | head -6 | awk '{printf "  %-10s %5s%% CPU  %5s%% MEM  %s\n", $1, $3, $4, $11}'
291}
292
293generate_summary() {
294    print_header "Summary & Recommendations"
295
296    echo "Potential Bottlenecks:"
297
298    local issues_found=false
299
300    # CPU check
301    if command -v mpstat &>/dev/null; then
302        local cpu_usage
303        cpu_usage=$(mpstat 1 1 | awk '/Average:/ {print 100 - $NF}')
304        if (( $(echo "$cpu_usage >= $CPU_CRITICAL" | bc -l) )); then
305            echo -e "  ${RED}[CRITICAL]${NC} CPU usage is very high ($cpu_usage%)"
306            echo "    - Check top CPU consumers"
307            echo "    - Consider scaling up CPU resources"
308            issues_found=true
309        fi
310    fi
311
312    # Memory check
313    local mem_percent
314    mem_percent=$(free | awk '/^Mem:/ {printf "%.1f", ($3 / $2) * 100}')
315    if (( $(echo "$mem_percent >= $MEM_CRITICAL" | bc -l) )); then
316        echo -e "  ${RED}[CRITICAL]${NC} Memory usage is very high ($mem_percent%)"
317        echo "    - Check for memory leaks"
318        echo "    - Review top memory consumers"
319        issues_found=true
320    fi
321
322    # Swap check
323    local swap_percent
324    swap_percent=$(free | awk '/^Swap:/ {if ($2 > 0) printf "%.1f", ($3 / $2) * 100; else print "0"}')
325    if (( $(echo "$swap_percent >= $MEM_WARNING" | bc -l) )); then
326        echo -e "  ${YELLOW}[WARNING]${NC} High swap usage ($swap_percent%)"
327        echo "    - System may be thrashing"
328        echo "    - Consider adding more RAM"
329        issues_found=true
330    fi
331
332    # Zombie processes
333    local zombie_count
334    zombie_count=$(ps aux | awk '$8 ~ /Z/ {count++} END {print count+0}')
335    if [[ $zombie_count -gt 0 ]]; then
336        echo -e "  ${YELLOW}[WARNING]${NC} Zombie processes detected ($zombie_count)"
337        echo "    - Check parent processes"
338        issues_found=true
339    fi
340
341    if [[ "$issues_found" == "false" ]]; then
342        echo -e "  ${GREEN}No critical issues detected${NC}"
343    fi
344}
345
346#------------------------------------------------------------------------------
347# Main
348#------------------------------------------------------------------------------
349
350main() {
351    local analyze_type="all"
352
353    # Parse arguments
354    while [[ $# -gt 0 ]]; do
355        case "$1" in
356            -h|--help)
357                usage
358                exit 0
359                ;;
360            -o|--output)
361                OUTPUT_FILE="$2"
362                shift 2
363                ;;
364            -v|--verbose)
365                VERBOSE=true
366                shift
367                ;;
368            -a|--analyze)
369                analyze_type="$2"
370                shift 2
371                ;;
372            *)
373                echo "Unknown option: $1"
374                usage
375                exit 1
376                ;;
377        esac
378    done
379
380    # Redirect output if file specified
381    if [[ -n "$OUTPUT_FILE" ]]; then
382        exec > >(tee "$OUTPUT_FILE")
383    fi
384
385    echo "System Performance Bottleneck Analysis"
386    echo "Generated: $(date)"
387    echo "Hostname: $(hostname)"
388
389    case "$analyze_type" in
390        all)
391            analyze_cpu
392            analyze_memory
393            analyze_disk
394            analyze_network
395            analyze_processes
396            generate_summary
397            ;;
398        cpu)
399            analyze_cpu
400            ;;
401        memory)
402            analyze_memory
403            ;;
404        disk)
405            analyze_disk
406            ;;
407        network)
408            analyze_network
409            ;;
410        process)
411            analyze_processes
412            ;;
413        *)
414            echo "Unknown analysis type: $analyze_type"
415            usage
416            exit 1
417            ;;
418    esac
419
420    if [[ -n "$OUTPUT_FILE" ]]; then
421        echo ""
422        echo "Report saved to: $OUTPUT_FILE"
423    fi
424}
425
426main "$@"