diff --git a/generate-demuxer.ts b/generate-demuxer.ts index d50facf..7e153aa 100644 --- a/generate-demuxer.ts +++ b/generate-demuxer.ts @@ -1,65 +1,88 @@ -const graph_density = 1000; +import findLoudness, { SwapPoint } from "./find-loudness"; + +const graph_density = 8000; const threshold_at_point = 1; -const inertia_s = 0.1; +const inertia_s = 0.3; const inertia_samples = inertia_s * graph_density; -let position = 0; -let last_swap_position = 0; - -const s = (n: number) => Math.round(n / graph_density); +const s = (n: number) => n / graph_density; const minutes = (units: number) => Math.floor(s(units) / 60); +const hours = (units: number) => Math.floor(units / graph_density / 60 / 60); + const formatTime = (units: number) => - `${minutes(units)}:${Math.floor(s(units) % 60)}`; + `${hours(units)}:${minutes(units)}:${Math.floor(s(units) % 60)}`; -let keep_loud_until = 0; +type Mode = { left: boolean; right: boolean }; -let total_speaking = 0; +async function run() { + const [left_breaks, right_breaks] = await Promise.all([ + findLoudness( + "/tmp/leftraw", + threshold_at_point, + inertia_samples, + "left" + ), + findLoudness( + "/tmp/rightraw", + threshold_at_point, + inertia_samples, + "right" + ), + ]); -const results: [string, number][] = []; + const merged = [...left_breaks, ...right_breaks].sort((a, b) => + a.position_start < b.position_start + ? -1 + : a.position_start > b.position_start + ? 1 + : 0 + ); -process.stdin.on("readable", () => { - let chunk: Buffer | null; - let was_loud_last_time = false; - while ((chunk = process.stdin.read()) !== null) { - for (let i = 0; i < chunk.byteLength; i++) { - position++; - const byte = chunk[i]; - const volume = Math.abs(byte - 128); - const is_loud: boolean = - volume > threshold_at_point || position < keep_loud_until; - if (is_loud) { - total_speaking++; - } - if (is_loud != was_loud_last_time) { - results.push([ - is_loud ? "silence" : "speaking", - position - last_swap_position, - ]); - last_swap_position = position; - was_loud_last_time = is_loud; - } - if (volume > threshold_at_point) { - keep_loud_until = position + inertia_samples; + // console.log("left breaks:", left_breaks); + // console.log(`right_breaks`, right_breaks); + // console.log(`merged`, merged); + + function new_mode(m: Mode, s: SwapPoint): Mode { + return { ...m, [s.label]: s.loud }; + } + + function mode_to_string(mode: Mode) { + if (mode.left && mode.right) { + return "both"; + } + for (const side of ["left", "right"]) { + if (mode[side as keyof Mode]) { + return side; } } + return "none"; } -}); -const mode_to_image = (mode: string) => { - return mode === "silence" ? "pics/cisza.png" : "pics/kuba.png"; -}; - -process.stdin.on("end", () => { - results.forEach(([mode, duration_units]) => { - console.log("file", `'${mode_to_image(mode)}'`); - console.log("duration", (duration_units / graph_density).toFixed(4)); - }); - - console.log("file", `'${mode_to_image(results[results.length - 1][0])}'`); - - // console.log(formatTime(total_speaking), formatTime(position)); -}); + console.log("file", `${process.cwd()}/pics/none.png`); + let last_point = 0; + let mode: Mode = { left: false, right: false }; + let last_file; + let total = 0; + for (let i = 2; i < merged.length; i++) { + const point = merged[i]; + mode = new_mode(mode, point); + const file = `${process.cwd()}/pics/${mode_to_string(mode)}.png`; + const duration = (point.position_start - last_point) / graph_density; + console.log( + "duration", + (point.position_start - last_point) / graph_density + ); + console.log("file", file); + last_point = point.position_start; + last_file = file; + total += duration * graph_density; + } + console.log("duration", merged[merged.length - 1].duration / graph_density); + console.log("file", last_file); + console.error(total, formatTime(total)); +} +run(); diff --git a/generate.sh b/generate.sh index f08dcfb..ceabba2 100755 --- a/generate.sh +++ b/generate.sh @@ -11,25 +11,27 @@ input=/home/kuba/Downloads/podcast-01-after-effects.mp3 # tutaj dajemy ścieżk aresample=8000 # to bez zmian echo dzielimy mp3 na dwa osobne wav -ffmpeg -i $input -map_channel 0.0.0 /tmp/left.wav -map_channel 0.0.1 /tmp/right.wav +#ffmpeg -i $input -map_channel 0.0.0 /tmp/left.wav -map_channel 0.0.1 /tmp/right.wav echo na dwóch wątkach generujemy surowe pliki -ffmpeg -i /tmp/left.wav -ac 1 -filter:a aresample=$aresample -map 0:a -c:a pcm_u8 -f data - > /tmp/leftraw & -ffmpeg -i /tmp/right.wav -ac 1 -filter:a aresample=$aresample -map 0:a -c:a pcm_u8 -f data - > /tmp/rightraw & +#ffmpeg -i /tmp/left.wav -ac 1 -filter:a aresample=$aresample -map 0:a -c:a pcm_u8 -f data - > /tmp/leftraw & +#ffmpeg -i /tmp/right.wav -ac 1 -filter:a aresample=$aresample -map 0:a -c:a pcm_u8 -f data - > /tmp/rightraw & # czekamy aż obydwa wątki się zakończą -wait; +#wait; echo "generating the demuxers..."; # generuje ścieżki do złożenia przez ffmpega: -ts-node generate-demuxer.ts > /tmp/demuxer.txt +ts-node generate-demuxer.ts > out/demuxer.txt -mkdir -f out +mkdir -p out # używa demuxer.txt żeby skleić końcowe video z dźwiękiem: echo generowanie całości -ffmpeg -y -f concat -i /tmp/demuxer.txt -r 30 -tune stillimage -vsync vfr -pix_fmt yuv420p out/video.mp4 +ffmpeg -y -f concat -safe 0 -i out/demuxer.txt -r 30 -tune stillimage -vsync vfr -pix_fmt yuv420p out/video.mp4 +# ^ daję safe 0 aby przyjmowało bezwzględne ścieżki + echo łączenie video z dźwiękiem: -ffmpeg -i video.mp4 -i $input -ac 1 -tune stillimage out/video-and-audio.mp4 +ffmpeg -i out/video.mp4 -i $input -ac 1 -tune stillimage out/video-and-audio.mp4