Speechdft-16-8-mono-5secs.wav
# Parameters n_fft = 1024 hop_len = 512 n_mels = 40
# ------------------------------------------------- # 3️⃣ Compute the DFT (via FFT) – only the positive frequencies # ------------------------------------------------- N = len(audio_float) # number of samples = 5 s × 16 kHz = 80 000 fft_vals = np.fft.rfft(audio_float) # real‑valued FFT → N/2+1 points fft_mag = np.abs(fft_vals) / N # normalise magnitude
import numpy as np from scipy.io import wavfile import matplotlib.pyplot as plt speechdft-16-8-mono-5secs.wav
import librosa import librosa.display
# Compute 13 MFCCs (typical default) mfccs = librosa.feature.mfcc(y=y, sr=sr_lib, n_mfcc=13, n_fft=512, hop_length=256) # Parameters n_fft = 1024 hop_len = 512
# Load with librosa (it handles 8‑bit conversion internally) y, sr_lib = librosa.load('speechdft-16-8-mono-5secs.wav', sr=16000, mono=True)
# ------------------------------------------------- # 2️⃣ Convert 8‑bit unsigned PCM to float [-1, 1] # ------------------------------------------------- # 8‑bit PCM in wav files is typically unsigned (0‑255) audio_float = (audio_int.astype(np.float32) - 128) / 128.0 # now in [-1, 1] sr_lib = librosa.load('speechdft-16-8-mono-5secs.wav'
y, sr = librosa.load('speechdft-16-8-mono-5secs.wav', sr=16000)