#!/bin/bash # --- Configuration --- WHISPER_SERVER_URL="http://127.0.0.1:8080/inference" TEMP_DIR="/dev/shm" # Use RAM-based storage OUTPUT_FILENAME="recorded_audio_$(date +%s).wav" OUTPUT_PATH="$TEMP_DIR/$OUTPUT_FILENAME" TYPE_DELAY=0.0 # Delay before typing in seconds # --- Parse command-line arguments --- NO_COPY=false NO_TYPE=false SERVER_URL="$WHISPER_SERVER_URL" # Simple argument parsing while [[ $# -gt 0 ]]; do case $1 in --no-copy) NO_COPY=true shift ;; --no-type) NO_TYPE=true shift ;; --server-url) SERVER_URL="$2" shift 2 ;; --type-delay) TYPE_DELAY="$2" shift 2 ;; --list-devices) echo "Available audio devices:" rec -L exit 0 ;; *) echo "Unknown option: $1" shift ;; esac done # --- Check for required tools --- check_tool() { if ! command -v "$1" &> /dev/null; then echo "Error: Required tool '$1' not found." return 1 fi return 0 } # Check for sox (rec) if ! check_tool "rec"; then echo "Error: 'rec' (SoX) not found. Cannot record audio." exit 1 fi # Check for curl if ! check_tool "curl"; then echo "Error: 'curl' not found. Cannot send requests to Whisper server." exit 1 fi # Check for optional tools WLCOPY_AVAILABLE=false WTYPE_AVAILABLE=false if check_tool "wl-copy"; then WLCOPY_AVAILABLE=true fi if check_tool "wtype"; then WTYPE_AVAILABLE=true fi # --- Record Audio --- echo "Recording... Press Ctrl+C to stop." # Record audio using SoX's rec command trap 'pkill -P $$' INT # Kill child processes on Ctrl+C rec -r 16000 -c 1 "$OUTPUT_PATH" echo "Recording finished. Saved to $OUTPUT_PATH" # --- Check if recording exists and has content --- if [ ! -f "$OUTPUT_PATH" ]; then echo "Error: Recording failed - output file not found." exit 1 fi if [ ! -s "$OUTPUT_PATH" ]; then echo "Error: Recording file is empty." exit 1 fi # --- Send to Whisper Server --- echo "Sending to Whisper server at $SERVER_URL..." # Use curl to send the file to the Whisper server RESPONSE=$(curl -s -X POST \ -F "file=@$OUTPUT_PATH" \ -F "temperature=0.0" \ -F "response_format=json" \ "$SERVER_URL") # Check if curl was successful if [ $? -ne 0 ]; then echo "Error: Failed to communicate with Whisper server." exit 1 fi # --- Process the response --- # Extract the text from the JSON response (basic method) TRANSCRIPTION=$(echo "$RESPONSE" | grep -o '"text":"[^"]*"' | sed 's/"text":"//;s/"$//') # If that fails, try the alternative format if [ -z "$TRANSCRIPTION" ]; then TRANSCRIPTION=$(echo "$RESPONSE" | grep -o '"result":"[^"]*"' | sed 's/"result":"//;s/"$//') fi # Replace escaped newlines with spaces TRANSCRIPTION=$(echo "$TRANSCRIPTION" | sed 's/\\n/ /g' | sed 's/\\r//g') # If still empty, report error if [ -z "$TRANSCRIPTION" ]; then echo "Error: Could not extract transcription from response." echo "Server response: $RESPONSE" exit 1 fi # --- Output the transcription --- echo "--- Transcription ---" echo "$TRANSCRIPTION" echo "---------------------" # --- Copy to clipboard --- if [ "$NO_COPY" = false ] && [ "$WLCOPY_AVAILABLE" = true ]; then echo -n "$TRANSCRIPTION" | wl-copy echo "Transcription copied to clipboard." elif [ "$NO_COPY" = false ]; then echo "wl-copy not found, cannot copy to clipboard." fi # --- Type out the transcription --- if [ "$NO_TYPE" = false ] && [ "$WTYPE_AVAILABLE" = true ]; then if (( $(echo "$TYPE_DELAY > 0" | bc -l) )); then echo "Waiting $TYPE_DELAY seconds before typing..." sleep "$TYPE_DELAY" fi echo -n "$TRANSCRIPTION" | wtype - echo "Transcription typed out." elif [ "$NO_TYPE" = false ]; then echo "wtype not found, cannot type transcription." fi # Optional: Clean up the temporary file # rm -f "$OUTPUT_PATH" exit 0