[O] Separate dependencies

This commit is contained in:
wuliaozhiji 2022-03-24 23:40:25 -04:00
parent 48226fd7f7
commit efa385bf43
3 changed files with 98 additions and 88 deletions

View File

@ -3,23 +3,24 @@ from __future__ import annotations
import csv
import json
import os
from dataclasses import dataclass
from json import JSONDecodeError
from multiprocessing import Pool
from os import PathLike
from pathlib import Path
from typing import Iterable, Literal, Callable
import jsonpickle as jsonpickle
import matplotlib.pyplot as plt
import numpy
import numpy as np
import pandas as pd
import parselmouth
import tqdm
import seaborn as sns
import tqdm
from matplotlib.patches import Patch
from spectral_tilt import tilt
from calculations import calculate_tilt, calculate_freq_info, FrequencyStats, calc_col_stats, calculate_freq_statistics, \
Statistics
ASAB = Literal['f', 'm']
COLOR_PINK = '#F5A9B8'
@ -27,31 +28,6 @@ COLOR_BLUE = '#5BCEFA'
CPU_CORES = 36
def calculate_freq_info(audio: parselmouth.Sound, show_plot=False) -> numpy.ndarray:
"""
Calculate pitch and frequency
:param show_plot: Show pyplot plot or not
:param audio: Sound input
:return: 2D Array (Each row is 1/100 of a second, row[0] is pitch (fundamental frequency), row[1:4] is formant)
"""
pitch_values = audio.to_pitch(0.01).selected_array['frequency']
formant_values = audio.to_formant_burg(0.01)
result = numpy.ndarray([len(pitch_values), 4], 'float32')
for i in range(len(pitch_values)):
pitch = pitch_values[i]
result[i][0] = pitch if pitch else None
for f in range(1, 4):
result[i][f] = formant_values.get_value_at_time(f, i / 100) if pitch else None
if show_plot:
plt.plot(result)
plt.show()
return result
def load_vox_celeb_asab_dict(path: PathLike) -> dict[str, ASAB]:
"""
Load voxCeleb 1 or 2's metadata to gather a dictionary mapping id to assigned sex at birth.
@ -119,7 +95,7 @@ def compute_audio_tilt(aud_dir: str):
"""
Compute and save the tilt info of one audio file
"""
spectral_tilt = tilt(parselmouth.Sound(aud_dir))
spectral_tilt = calculate_tilt(parselmouth.Sound(aud_dir))
with open(Path(aud_dir).with_suffix('.json'), 'w', encoding='utf-8') as f:
json.dump({'tilt': spectral_tilt}, f)
@ -146,59 +122,6 @@ def compute_audio_vox_celeb(func: Callable[[str], None]) -> None:
pass
@dataclass
class FrequencyStats:
pitch: Statistics
f1: Statistics
f2: Statistics
f3: Statistics
@dataclass
class Statistics:
mean: float
median: float
q1: float
q3: float
iqr: float
min: float
max: float
n: int
def calc_col_stats(col: np.ndarray) -> Statistics:
"""
Compute statistics for a data column
:param col: Input column (tested on 1D array)
:return: Statistics
"""
q1 = np.quantile(col, 0.25)
q3 = np.quantile(col, 0.75)
return Statistics(
float(np.mean(col)),
float(np.median(col)),
float(q1),
float(q3),
float(q3 - q1),
float(np.min(col)),
float(np.max(col)),
len(col)
)
def calculate_freq_statistics(arr: np.ndarray) -> FrequencyStats:
"""
Calculate frequency data array statistics
:param arr: n-by-4 Array from calculate_freq_info
:return: Statistics
"""
result = [calc_col_stats(arr[:, i]) for i in range(0, 4)]
return FrequencyStats(*result)
def combine_id_freq(id_dir: Path):
"""
Combine frequency data of all audio files under one person
@ -267,10 +190,10 @@ def collect_visualize_freq():
stats_list.append((jsonpickle.decode(stats_dir.read_text()), agab[id]))
# Get AFAB and AMAB means
headers = ['Pitch\n(Fundamental\nFrequency)', 'Formant F1', 'Formant F2', 'Formant F3', 'F1 Ratio', 'F2 Ratio', 'F3 Ratio']
f_means = np.array([[t.mean for t in [s.pitch, s.f1, s.f2, s.f3, s.f1ratio, s.f2ratio, s.f3ratio]]
headers = ['Pitch\n(Fundamental\nFrequency)', 'Formant F1', 'Formant F2', 'Formant F3']
f_means = np.array([[t.mean for t in [s.pitch, s.f1, s.f2, s.f3]]
for s, ag in stats_list if ag == 'f'])
m_means = np.array([[t.mean for t in [s.pitch, s.f1, s.f2, s.f3, s.f1ratio, s.f2ratio, s.f3ratio]]
m_means = np.array([[t.mean for t in [s.pitch, s.f1, s.f2, s.f3]]
for s, ag in stats_list if ag == 'm'])
# Plot bar chart

View File

@ -1,7 +1,11 @@
import json
from pathlib import Path
from typing import Literal
from parselmouth import Sound
from scipy.stats import gaussian_kde
from statistics import *
from calculations import calculate_freq_statistics, calculate_freq_info, calculate_tilt
Feature = Literal['pitch', 'f1', 'f2', 'f3', 'tilt']
Gender = Literal['f', 'm']
@ -37,7 +41,7 @@ def load_kde() -> dict[Feature, dict[Gender, gaussian_kde]]:
def calculate_feature_means(audio: Sound) -> dict[Feature, float]:
s = calculate_freq_statistics(calculate_freq_info(audio))
return {'pitch': s.pitch.mean, 'f1': s.f1.mean, 'f2': s.f2.mean, 'f3': s.f3.mean, 'tilt': tilt(audio)}
return {'pitch': s.pitch.mean, 'f1': s.f1.mean, 'f2': s.f2.mean, 'f3': s.f3.mean, 'tilt': calculate_tilt(audio)}
def _calculate_fem_prob(feature: Feature, value: float) -> float:

View File

@ -1,10 +1,14 @@
from __future__ import annotations
import math
from dataclasses import dataclass
import numpy
import numpy as np
import parselmouth
def tilt(sound: parselmouth.Sound) -> float | None:
def calculate_tilt(sound: parselmouth.Sound) -> float | None:
"""
Compute spectral tilt
@ -65,3 +69,82 @@ def tilt(sound: parselmouth.Sound) -> float | None:
sXY = sumXY - ((sumX * sumY) / len(bins))
spectral_tilt = sXY / sXX
return spectral_tilt
def calculate_freq_info(audio: parselmouth.Sound, show_plot=False) -> numpy.ndarray:
"""
Calculate pitch and frequency
:param show_plot: Show pyplot plot or not
:param audio: Sound input
:return: 2D Array (Each row is 1/100 of a second, row[0] is pitch (fundamental frequency), row[1:4] is formant)
"""
pitch_values = audio.to_pitch(0.01).selected_array['frequency']
formant_values = audio.to_formant_burg(0.01)
result = numpy.ndarray([len(pitch_values), 4], 'float32')
for i in range(len(pitch_values)):
pitch = pitch_values[i]
result[i][0] = pitch if pitch else None
for f in range(1, 4):
result[i][f] = formant_values.get_value_at_time(f, i / 100) if pitch else None
if show_plot:
import matplotlib.pyplot as plt
plt.plot(result)
plt.show()
return result
@dataclass
class FrequencyStats:
pitch: Statistics
f1: Statistics
f2: Statistics
f3: Statistics
@dataclass
class Statistics:
mean: float
median: float
q1: float
q3: float
iqr: float
min: float
max: float
n: int
def calc_col_stats(col: np.ndarray) -> Statistics:
"""
Compute statistics for a data column
:param col: Input column (tested on 1D array)
:return: Statistics
"""
q1 = np.quantile(col, 0.25)
q3 = np.quantile(col, 0.75)
return Statistics(
float(np.mean(col)),
float(np.median(col)),
float(q1),
float(q3),
float(q3 - q1),
float(np.min(col)),
float(np.max(col)),
len(col)
)
def calculate_freq_statistics(arr: np.ndarray) -> FrequencyStats:
"""
Calculate frequency data array statistics
:param arr: n-by-4 Array from calculate_freq_info
:return: Statistics
"""
result = [calc_col_stats(arr[:, i]) for i in range(0, 4)]
return FrequencyStats(*result)