csv_parser.sh

Download
bash 255 lines 5.7 KB
  1#!/usr/bin/env bash
  2set -euo pipefail
  3
  4# CSV Parser
  5# Reads CSV files, supports querying, and pretty-prints as a table
  6
  7# Global arrays to store CSV data
  8declare -a headers
  9declare -a rows
 10
 11# Parse CSV file
 12parse_csv() {
 13    local file="$1"
 14    local line_num=0
 15
 16    if [[ ! -f "$file" ]]; then
 17        echo "Error: File '$file' not found" >&2
 18        return 1
 19    fi
 20
 21    while IFS= read -r line; do
 22        ((line_num++))
 23
 24        # Parse CSV line handling quoted fields
 25        local -a fields
 26        parse_csv_line "$line" fields
 27
 28        if [[ $line_num -eq 1 ]]; then
 29            # First line is headers
 30            headers=("${fields[@]}")
 31        else
 32            # Store row data as comma-separated string
 33            # (bash arrays of arrays are tricky, so we serialize)
 34            local row_data
 35            printf -v row_data '%s,' "${fields[@]}"
 36            row_data="${row_data%,}"  # Remove trailing comma
 37            rows+=("$row_data")
 38        fi
 39    done < "$file"
 40
 41    echo "Parsed $((line_num - 1)) data rows with ${#headers[@]} columns"
 42}
 43
 44# Parse a single CSV line into an array
 45# Handles quoted fields with commas inside
 46parse_csv_line() {
 47    local line="$1"
 48    local -n result_array="$2"
 49
 50    result_array=()
 51    local field=""
 52    local in_quotes=false
 53    local i
 54
 55    for ((i=0; i<${#line}; i++)); do
 56        local char="${line:i:1}"
 57
 58        if [[ "$char" == '"' ]]; then
 59            if $in_quotes; then
 60                # Check for escaped quote ("")
 61                if [[ "${line:i+1:1}" == '"' ]]; then
 62                    field+="\""
 63                    ((i++))
 64                else
 65                    in_quotes=false
 66                fi
 67            else
 68                in_quotes=true
 69            fi
 70        elif [[ "$char" == ',' ]] && ! $in_quotes; then
 71            result_array+=("$field")
 72            field=""
 73        else
 74            field+="$char"
 75        fi
 76    done
 77
 78    # Add last field
 79    result_array+=("$field")
 80}
 81
 82# Get column index by header name
 83get_column_index() {
 84    local column_name="$1"
 85    local i
 86
 87    for i in "${!headers[@]}"; do
 88        if [[ "${headers[i]}" == "$column_name" ]]; then
 89            echo "$i"
 90            return 0
 91        fi
 92    done
 93
 94    echo "Error: Column '$column_name' not found" >&2
 95    return 1
 96}
 97
 98# Query rows by column value
 99query_by_column() {
100    local column_name="$1"
101    local search_value="$2"
102
103    local col_index
104    col_index=$(get_column_index "$column_name") || return 1
105
106    echo "Searching for '$search_value' in column '$column_name':"
107    echo
108
109    local found=0
110    for row_data in "${rows[@]}"; do
111        IFS=',' read -ra fields <<< "$row_data"
112
113        if [[ "${fields[col_index]}" == *"$search_value"* ]]; then
114            ((found++))
115            for i in "${!headers[@]}"; do
116                printf "  %-15s: %s\n" "${headers[i]}" "${fields[i]}"
117            done
118            echo
119        fi
120    done
121
122    if [[ $found -eq 0 ]]; then
123        echo "  No matching rows found"
124    else
125        echo "Found $found matching row(s)"
126    fi
127}
128
129# Pretty-print entire table
130print_table() {
131    # Calculate column widths
132    local -a col_widths
133    local i
134
135    # Initialize with header lengths
136    for i in "${!headers[@]}"; do
137        col_widths[i]=${#headers[i]}
138    done
139
140    # Check data rows for max widths
141    for row_data in "${rows[@]}"; do
142        IFS=',' read -ra fields <<< "$row_data"
143        for i in "${!fields[@]}"; do
144            local field_len=${#fields[i]}
145            if [[ $field_len -gt ${col_widths[i]} ]]; then
146                col_widths[i]=$field_len
147            fi
148        done
149    done
150
151    # Print header
152    echo
153    for i in "${!headers[@]}"; do
154        printf "| %-${col_widths[i]}s " "${headers[i]}"
155    done
156    echo "|"
157
158    # Print separator
159    for i in "${!headers[@]}"; do
160        printf "|"
161        printf -- '-%.0s' $(seq 1 $((col_widths[i] + 2)))
162    done
163    echo "|"
164
165    # Print data rows
166    for row_data in "${rows[@]}"; do
167        IFS=',' read -ra fields <<< "$row_data"
168        for i in "${!fields[@]}"; do
169            printf "| %-${col_widths[i]}s " "${fields[i]}"
170        done
171        echo "|"
172    done
173    echo
174}
175
176# Get column values as array
177get_column() {
178    local column_name="$1"
179    local col_index
180    col_index=$(get_column_index "$column_name") || return 1
181
182    echo "Values in column '$column_name':"
183    for row_data in "${rows[@]}"; do
184        IFS=',' read -ra fields <<< "$row_data"
185        echo "  ${fields[col_index]}"
186    done
187}
188
189# Create sample CSV file
190create_sample_csv() {
191    local file="$1"
192
193    cat > "$file" << 'EOF'
194Name,Age,City,Email
195Alice Smith,30,New York,alice@example.com
196Bob Jones,25,Los Angeles,bob@example.com
197Charlie Brown,35,Chicago,charlie@example.com
198"Diana, Princess",28,"London, UK",diana@example.com
199Eve Wilson,42,Boston,eve@example.com
200Frank Lee,31,Seattle,frank@example.com
201EOF
202
203    echo "Created sample CSV: $file"
204}
205
206# Main demo
207main() {
208    echo "=== CSV Parser Demo ==="
209    echo
210
211    # Create sample CSV
212    local csv_file="/tmp/sample_data.csv"
213    create_sample_csv "$csv_file"
214
215    echo
216    echo "--- Parsing CSV ---"
217    parse_csv "$csv_file"
218
219    echo
220    echo "--- Pretty Print Table ---"
221    print_table
222
223    echo "--- Query by Column ---"
224    echo
225    query_by_column "City" "Los Angeles"
226
227    echo
228    echo "--- Get Column Values ---"
229    echo
230    get_column "Name"
231
232    echo
233    echo "--- Query with Quoted Field ---"
234    echo
235    query_by_column "Name" "Diana"
236
237    # Cleanup
238    rm -f "$csv_file"
239
240    echo
241    echo "=== Demo Complete ==="
242    echo
243    echo "Usage:"
244    echo "  source $0  # To load functions"
245    echo "  parse_csv <file.csv>"
246    echo "  print_table"
247    echo "  query_by_column <column_name> <search_value>"
248    echo "  get_column <column_name>"
249}
250
251# Run demo if executed directly
252if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
253    main "$@"
254fi