Last active
November 15, 2023 22:33
-
-
Save zepadovani/3b4eb0c5575882a58889d186bead578d to your computer and use it in GitHub Desktop.
librosa_normalization_factor_calculation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.signal.windows as sciwins | |
import librosa | |
import matplotlib.pyplot as plt | |
## The idea of this script is to calculate the normalization factor for a given window function | |
## The normalization factor is the reciprocal of the sum of the values of the chosen frame | |
## The normalization factor is first calculated for a very short sinusoidal signal, with the | |
## frequency set in the middle of the spectrum and amplitude set to 1 | |
## From this, the normalization factor is calculated for a given window function and is then applied | |
## It is important to note that the normalization factor is calculated for analysis and spectral processing | |
## purposes, and is not the same as the normalization factor used for synthesis purposes as librosa.isftf | |
## has its own normalization factor built in. | |
## Also important to note that the normalization factor is calculated for a specific frame, and is not dealing | |
## with consecutive/overlapping frames. | |
## this works well for hanning, but other windows are not normalized | |
## have also tried just 2/np.sum(window), but it is not right for some types of windows | |
# def normfactor(window): | |
# N = len(window) | |
# norm_factor = (1 /(np.sum(window) / (N-1))) | |
# return norm_factor | |
def calculate_normalization_factor(window, sr=48000, n_fft=2048, normtype='max', reciprocal=False): | |
""" | |
Calculates the normalization factor for a given window function. | |
Args: | |
window (string or tuple): The window function to use for the STFT. | |
sr (int): The sampling rate of the signal. | |
n_fft (int): The number of FFT points to use for the STFT. | |
normtype (string): The type of normalization to use. Can be 'max' or 'sum'. | |
If 'max', the normalization factor is the reciprocal of the maximum value of the chosen frame. | |
If 'sum', the normalization factor is the reciprocal of the sum of the values of the chosen frame. | |
reciprocal (bool): Whether to return the reciprocal of the normalization factor. | |
Returns: | |
float: The normalization factor. | |
""" | |
# Generate a test sinusoidal signal/ | |
dt = 1/sr | |
t = np.arange(0, dt*n_fft*5, dt) | |
frequency = librosa.fft_frequencies(sr=sr, n_fft=n_fft)[int(n_fft/4)] | |
signal = 1 * np.cos(2 * np.pi * frequency * t) ## normalizing according to a sine, amp1, in the first bin | |
# Calculate STFT and magnitudes | |
stft_matrix = librosa.stft(signal, n_fft=n_fft, hop_length=n_fft, window=window) | |
magnitudes = np.abs(stft_matrix) | |
# Choose a frame for normalization | |
frame_to_get_normfact = magnitudes[:, 2] | |
# Calculate the normalization factor | |
if(normtype == 'max'): | |
normalization_factor = 1 / np.max(frame_to_get_normfact) | |
elif(normtype == 'sum'): | |
normalization_factor = 1 / np.sum(frame_to_get_normfact) | |
if(reciprocal): | |
normalization_factor = 1 / normalization_factor | |
return normalization_factor | |
def generate_test_sinusoid(frequency, sr=48000, amp=1, phase=0, duration=3): | |
""" | |
Generate a sinusoidal waveform with the specified frequency, sampling rate, amplitude, phase, and duration. | |
Args: | |
frequency (float): The frequency of the sinusoid in Hz. | |
sr (int, optional): The sampling rate of the sinusoid in Hz. Defaults to 48000. | |
amp (float, optional): The amplitude of the sinusoid. Defaults to 1. | |
phase (float, optional): The phase of the sinusoid in radians. Defaults to 0. | |
duration (float, optional): The duration of the sinusoid in seconds. Defaults to 3. | |
Returns: | |
numpy.ndarray: A 1D numpy array containing the generated sinusoid. | |
""" | |
t = np.arange(0, duration, 1/sr) | |
sinusoid = amp * np.cos(2 * np.pi * frequency * t + phase) | |
return sinusoid | |
def raw_magnitude_stft(array, window, n_fft=2048, overlap=4): | |
""" | |
Compute the raw magnitude STFT of an audio signal. | |
Parameters: | |
----------- | |
array : np.ndarray | |
Audio signal to compute the STFT of. | |
window : str or tuple | |
Window function to use for the STFT. | |
n_fft : int, optional | |
Number of FFT points to use. Defaults to 2048. | |
overlap : int, optional | |
Number of points of overlap between consecutive frames. Defaults to 4. | |
Returns: | |
-------- | |
magnitudes : np.ndarray | |
Magnitude STFT of the input signal. | |
phases : np.ndarray | |
Phase STFT of the input signal. | |
""" | |
hop = int(n_fft / overlap) | |
stft_matrix = librosa.stft(array, n_fft=n_fft, hop_length=hop, window=window) | |
magnitudes = np.abs(stft_matrix) | |
phases = np.angle(stft_matrix) | |
return magnitudes, phases | |
# Parameters | |
sr = 48000 | |
n_fft = 512 | |
overlaps = 2 | |
hop = int(n_fft / overlaps) | |
# Generate test signal | |
sinfreqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft) | |
sine_frequency = sinfreqs[10] | |
test_signal = generate_test_sinusoid(sine_frequency, amp=0.65) | |
# Calculate STFT magnitudes | |
# win = sciwins.hann(n_fft, sym=False) | |
win = sciwins.blackmanharris(n_fft, sym=False) | |
mags, phases = raw_magnitude_stft(test_signal, win, n_fft=n_fft, overlap=overlaps) | |
# Calculate normalization factor | |
normfact = calculate_normalization_factor(win, sr=sr, n_fft=n_fft, normtype='sum') | |
# Apply normalization factor to a specific frame | |
norm_frame = mags[:, 10] * normfact | |
# Print and plot results | |
print("Normalization Factor:", normfact) | |
print("Sum after normalization:", np.sum(norm_frame)) | |
plt.plot(norm_frame) | |
plt.title('Normalized Frame') | |
plt.xlabel('Frequency Bin') | |
plt.ylabel('Magnitude') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment