import { isSafari } from "react-device-detect";

const _freqRanges = ["all", "voice", "ambient1", "ambient2"] as const;
type FreqRange = typeof _freqRanges[number];
const freqRanges = _freqRanges as Readonly<FreqRange[]>;

const averagingFactor = 0.8;

export type VolumeAnalyzerResult = Record<FreqRange, number>;

export class VolumeAnalyzer {
  private stream: MediaStream;
  private streamCloned: boolean;
  private audioCtx: AudioContext;
  private analyzer: AnalyserNode;
  private source: MediaStreamAudioSourceNode;
  private frequencyData: Uint8Array;
  private volumes: VolumeAnalyzerResult;

  constructor(stream: MediaStream) {
    this.audioCtx = new (window.AudioContext || (window as any).webkitAudioContext)(); // eslint-disable-line @typescript-eslint/no-explicit-any
    if (isSafari) {
      this.streamCloned = true;
      this.stream = stream.clone();
      this.stream.getAudioTracks().forEach((e) => (e.enabled = true));
    } else {
      this.stream = stream;
      this.streamCloned = false;
    }
    this.source = this.audioCtx.createMediaStreamSource(this.stream);
    const analyzer = this.audioCtx.createAnalyser();
    analyzer.minDecibels = -70; // Default: -100
    analyzer.maxDecibels = -40; // Default: -30
    analyzer.fftSize = 1 << 11;
    analyzer.smoothingTimeConstant = 0.5;
    this.source.connect(analyzer);
    this.analyzer = analyzer;

    this.frequencyData = new Uint8Array(this.analyzer.frequencyBinCount);

    this.volumes = {} as VolumeAnalyzerResult;
    freqRanges.forEach((fr) => (this.volumes[fr] = 0));
  }

  public clear() {
    this.analyzer.disconnect();
    this.source.disconnect();
    this.streamCloned && this.stream.getTracks().forEach((e) => e.stop());
    this.audioCtx.close();
  }

  public getVolume = (): VolumeAnalyzerResult => {
    this.analyzer.getByteFrequencyData(this.frequencyData);

    const rms = {} as Record<FreqRange, SquareSum>;
    freqRanges.forEach((r) => (rms[r] = { sum: 0, count: 0 }));

    const freqStep = this.audioCtx.sampleRate / this.analyzer.fftSize;
    for (let i = 0; i < this.analyzer.frequencyBinCount; i++) {
      const hertz = i * freqStep; // 周波数 [Hz]
      const v = this.frequencyData[i] / 0xff; // デシベル比 [0, 1] -> [minDecibels, maxDecibels]

      // @see https://ja.wikipedia.org/wiki/%E9%9F%B3%E5%9F%9F#%E4%BA%BA%E5%A3%B0%E3%81%AE%E9%9F%B3%E5%9F%9F
      rms.all.sum += v * v;
      rms.all.count += 1;
      if (200 <= hertz && hertz <= 1000) {
        rms.voice.sum += v * v;
        rms.voice.count += 1;
      } else if (1000 < hertz && hertz <= 10000) {
        rms.ambient1.sum += v * v;
        rms.ambient1.count += 1;
      } else if (10000 < hertz && hertz <= 20000) {
        rms.ambient2.sum += v * v;
        rms.ambient2.count += 1;
      }
    }

    // Fast attack, slow fadeout
    freqRanges.forEach((fr) => (this.volumes[fr] = Math.max(calcRMS(rms[fr]), this.volumes[fr] * averagingFactor)));

    return this.volumes;
  };
}

interface SquareSum {
  sum: number;
  count: number;
}

function calcRMS(ss: SquareSum): number {
  // @see https://en.wikipedia.org/wiki/Root_mean_square
  return Math.sqrt(ss.sum / ss.count);
}
