Procházet zdrojové kódy

Add a shellscript to automatically transcribe recordings

Samuel W. Flint před 1 rokem
rodič
revize
ed8945e013
1 změnil soubory, kde provedl 34 přidání a 0 odebrání
  1. 34 0
      transcribe

+ 34 - 0
transcribe

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+export TRANSCRIBE_MODEL=${TRANSCRIBE_MODEL:-turbo}
+
+
+if [[ $# -lt 2 ]] ; then
+    echo "$(basename $0) RECORDER_DIR OUTPUT_DIR" >&2
+    exit 1
+fi
+
+export TRANSCRIBE_DIR=$1
+export TRANSCRIBE_OUT=$2
+set -e
+
+function run_transcription {
+    INPATH="${1}"
+    REL_PATH=$(realpath --relative-to "${TRANSCRIBE_DIR}" $(dirname "${INPATH}"))
+    OUT_DIR_PATH="${TRANSCRIBE_OUT}/${REL_PATH}"
+    set -x
+    mkdir -p "${OUT_DIR_PATH}"
+    whisper --model "${TRANSCRIBE_MODEL}" \
+            --output_dir "${OUT_DIR_PATH}" \
+            --output_format txt \
+            --task transcribe \
+            --language en \
+            "${INPATH}"
+    rm "${INPATH}"
+}
+
+export -f run_transcription
+
+find ${TRANSCRIBE_DIR} -type l,f \
+     -\( -name '*.wav' -o -name '*.mp3' -\) \
+     -exec bash -c 'run_transcription "$0"' {} \;