init

2026-02-16 15:50:16 +03:00
commit afb81b8278
13816 changed files with 3689732 additions and 0 deletions
--- a/Telegram/SourceFiles/media/audio/media_audio.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio.cpp
--- a/Telegram/SourceFiles/media/audio/media_audio.h
+++ b/Telegram/SourceFiles/media/audio/media_audio.h
@@ -0,0 +1,424 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include "ui/effects/animation_value.h"
+#include "core/file_location.h"
+#include "data/data_audio_msg_id.h"
+#include "base/bytes.h"
+#include "base/timer.h"
+
+#include <QtCore/QTimer>
+
+namespace Ui {
+struct PreparedFileInformation;
+} // namespace Ui
+
+namespace Media {
+struct ExternalSoundData;
+struct ExternalSoundPart;
+} // namespace Media
+
+namespace Media {
+namespace Streaming {
+struct TimePoint;
+} // namespace Streaming
+} // namespace Media
+
+namespace Webrtc {
+struct DeviceResolvedId;
+} // namespace Webrtc
+
+namespace Media {
+namespace Audio {
+
+class Instance;
+
+// Thread: Main.
+void Start(not_null<Instance*> instance);
+void Finish(not_null<Instance*> instance);
+
+// Thread: Main. Locks: AudioMutex.
+bool IsAttachedToDevice();
+
+// Thread: Any. Must be locked: AudioMutex.
+bool AttachToDevice();
+
+// Thread: Any.
+void ScheduleDetachFromDeviceSafe();
+void ScheduleDetachIfNotUsedSafe();
+void StopDetachIfNotUsedSafe();
+bool SupportsSpeedControl();
+
+} // namespace Audio
+
+namespace Player {
+
+constexpr auto kDefaultFrequency = 48000; // 48 kHz
+constexpr auto kTogetherLimit = 4;
+constexpr auto kWaveformSamplesCount = 100;
+
+class Fader;
+class Loaders;
+
+[[nodiscard]] rpl::producer<AudioMsgId> Updated();
+
+float64 ComputeVolume(AudioMsgId::Type type);
+
+enum class State {
+	Stopped = 0x01,
+	StoppedAtEnd = 0x02,
+	StoppedAtError = 0x03,
+	StoppedAtStart = 0x04,
+
+	Starting = 0x08,
+	Playing = 0x10,
+	Stopping = 0x18,
+	Pausing = 0x20,
+	Paused = 0x28,
+	PausedAtEnd = 0x30,
+	Resuming = 0x38,
+};
+
+inline bool IsStopped(State state) {
+	return (state == State::Stopped)
+		|| (state == State::StoppedAtEnd)
+		|| (state == State::StoppedAtError)
+		|| (state == State::StoppedAtStart);
+}
+
+inline bool IsStoppedOrStopping(State state) {
+	return IsStopped(state) || (state == State::Stopping);
+}
+
+inline bool IsStoppedAtEnd(State state) {
+	return (state == State::StoppedAtEnd);
+}
+
+inline bool IsPaused(State state) {
+	return (state == State::Paused)
+		|| (state == State::PausedAtEnd);
+}
+
+inline bool IsPausedOrPausing(State state) {
+	return IsPaused(state) || (state == State::Pausing);
+}
+
+inline bool IsFading(State state) {
+	return (state == State::Starting)
+		|| (state == State::Stopping)
+		|| (state == State::Pausing)
+		|| (state == State::Resuming);
+}
+
+inline bool IsActive(State state) {
+	return !IsStopped(state) && !IsPaused(state);
+}
+
+inline bool ShowPauseIcon(State state) {
+	return !IsStoppedOrStopping(state)
+		&& !IsPausedOrPausing(state);
+}
+
+struct TrackState {
+	AudioMsgId id;
+	State state = State::Stopped;
+	int64 position = 0;
+	int64 receivedTill = 0;
+	int64 length = 0;
+	int frequency = kDefaultFrequency;
+	int fileHeaderSize = 0;
+	bool waitingForData = false;
+};
+
+class Mixer final : public QObject {
+	Q_OBJECT
+
+public:
+	explicit Mixer(not_null<Audio::Instance*> instance);
+
+	void play(
+		const AudioMsgId &audio,
+		std::unique_ptr<ExternalSoundData> externalData,
+		crl::time positionMs);
+	void pause(const AudioMsgId &audio, bool fast = false);
+	void resume(const AudioMsgId &audio, bool fast = false);
+	void stop(const AudioMsgId &audio);
+	void stop(const AudioMsgId &audio, State state);
+
+	// External player audio stream interface.
+	void feedFromExternal(ExternalSoundPart &&part);
+	void forceToBufferExternal(const AudioMsgId &audioId);
+
+	// Thread: Main. Locks: AudioMutex.
+	void setSpeedFromExternal(const AudioMsgId &audioId, float64 speed);
+
+	Streaming::TimePoint getExternalSyncTimePoint(
+		const AudioMsgId &audio) const;
+	crl::time getExternalCorrectedTime(
+		const AudioMsgId &id,
+		crl::time frameMs,
+		crl::time systemMs);
+
+	void stopAndClear();
+
+	TrackState currentState(AudioMsgId::Type type);
+
+	// Thread: Main. Must be locked: AudioMutex.
+	void prepareToCloseDevice();
+
+	// Thread: Main. Must be locked: AudioMutex.
+	void reattachIfNeeded();
+
+	// Thread: Any. Must be locked: AudioMutex.
+	void reattachTracks();
+
+	// Thread: Any.
+	void setSongVolume(float64 volume);
+	float64 getSongVolume() const;
+	void setVideoVolume(float64 volume);
+	float64 getVideoVolume() const;
+
+	void scheduleFaderCallback();
+
+	~Mixer();
+
+private Q_SLOTS:
+	void onError(const AudioMsgId &audio);
+	void onStopped(const AudioMsgId &audio);
+
+	void onUpdated(const AudioMsgId &audio);
+
+Q_SIGNALS:
+	void updated(const AudioMsgId &audio);
+	void stoppedOnError(const AudioMsgId &audio);
+	void loaderOnStart(const AudioMsgId &audio, qint64 positionMs);
+	void loaderOnCancel(const AudioMsgId &audio);
+
+	void suppressSong();
+	void unsuppressSong();
+	void suppressAll(qint64 duration);
+
+private:
+	class Track {
+	public:
+		static constexpr int kBuffersCount = 3;
+
+		// Thread: Any. Must be locked: AudioMutex.
+		void reattach(AudioMsgId::Type type);
+
+		// Thread: Main. Must be locked: AudioMutex.
+		void detach();
+		void clear();
+
+		void started();
+
+		bool isStreamCreated() const;
+		void ensureStreamCreated(AudioMsgId::Type type);
+
+		int getNotQueuedBufferIndex();
+
+		// Thread: Main. Must be locked: AudioMutex.
+		void setExternalData(std::unique_ptr<ExternalSoundData> data);
+
+		void updateStatePosition();
+		void updateWithSpeedPosition();
+
+		[[nodiscard]] static int64 SpeedIndependentPosition(
+			int64 position,
+			float64 speed);
+		[[nodiscard]] static int64 SpeedDependentPosition(
+			int64 position,
+			float64 speed);
+
+		~Track();
+
+		TrackState state;
+
+		Core::FileLocation file;
+		QByteArray data;
+
+		int format = 0;
+		bool loading = false;
+		bool loaded = false;
+		bool waitingForBuffer = false;
+
+		// Speed dependent values.
+		float64 speed = 1.;
+		float64 nextSpeed = 1.;
+		struct WithSpeed {
+			int64 fineTunedPosition = 0;
+			int64 position = 0;
+			int64 length = 0;
+			int64 bufferedPosition = 0;
+			int64 bufferedLength = 0;
+			int64 fadeStartPosition = 0;
+			int samples[kBuffersCount] = { 0 };
+			QByteArray buffered[kBuffersCount];
+		};
+		WithSpeed withSpeed;
+
+		struct Stream {
+			uint32 source = 0;
+			uint32 buffers[kBuffersCount] = { 0 };
+		};
+		Stream stream;
+
+		std::unique_ptr<ExternalSoundData> externalData;
+
+		crl::time lastUpdateWhen = 0;
+		crl::time lastUpdatePosition = 0;
+
+	private:
+		void createStream(AudioMsgId::Type type);
+		void destroyStream();
+		void resetStream();
+
+	};
+
+	bool fadedStop(AudioMsgId::Type type, bool *fadedStart = 0);
+	void resetFadeStartPosition(AudioMsgId::Type type, int positionInBuffered = -1);
+	bool checkCurrentALError(AudioMsgId::Type type);
+
+	void externalSoundProgress(const AudioMsgId &audio);
+
+	// Thread: Any. Must be locked: AudioMutex.
+	void setStoppedState(Track *current, State state = State::Stopped);
+
+	Track *trackForType(AudioMsgId::Type type, int index = -1); // -1 uses currentIndex(type)
+	const Track *trackForType(AudioMsgId::Type type, int index = -1) const;
+	int *currentIndex(AudioMsgId::Type type);
+	const int *currentIndex(AudioMsgId::Type type) const;
+
+	const not_null<Audio::Instance*> _instance;
+
+	int _audioCurrent = 0;
+	Track _audioTracks[kTogetherLimit];
+
+	int _songCurrent = 0;
+	Track _songTracks[kTogetherLimit];
+
+	Track _videoTrack;
+
+	QAtomicInt _volumeVideo;
+	QAtomicInt _volumeSong;
+
+	friend class Fader;
+	friend class Loaders;
+
+	QThread _faderThread, _loaderThread;
+	Fader *_fader;
+	Loaders *_loader;
+
+	rpl::lifetime _lifetime;
+
+};
+
+Mixer *mixer();
+
+class Fader : public QObject {
+	Q_OBJECT
+
+public:
+	Fader(QThread *thread);
+
+	void songVolumeChanged();
+	void videoVolumeChanged();
+
+Q_SIGNALS:
+	void error(const AudioMsgId &audio);
+	void playPositionUpdated(const AudioMsgId &audio);
+	void audioStopped(const AudioMsgId &audio);
+	void needToPreload(const AudioMsgId &audio);
+
+public Q_SLOTS:
+	void onInit();
+	void onTimer();
+
+	void onSuppressSong();
+	void onUnsuppressSong();
+	void onSuppressAll(qint64 duration);
+
+private:
+	enum {
+		EmitError = 0x01,
+		EmitStopped = 0x02,
+		EmitPositionUpdated = 0x04,
+		EmitNeedToPreload = 0x08,
+	};
+	int32 updateOnePlayback(Mixer::Track *track, bool &hasPlaying, bool &hasFading, float64 volumeMultiplier, bool volumeChanged);
+	void setStoppedState(Mixer::Track *track, State state = State::Stopped);
+
+	QTimer _timer;
+
+	bool _volumeChangedSong = false;
+	bool _volumeChangedVideo = false;
+
+	bool _suppressAll = false;
+	bool _suppressAllAnim = false;
+	bool _suppressSong = false;
+	bool _suppressSongAnim = false;
+	anim::value _suppressVolumeAll;
+	anim::value _suppressVolumeSong;
+	crl::time _suppressAllStart = 0;
+	crl::time _suppressAllEnd = 0;
+	crl::time _suppressSongStart = 0;
+
+};
+
+[[nodiscard]] Ui::PreparedFileInformation PrepareForSending(
+	const QString &fname,
+	const QByteArray &data);
+
+namespace internal {
+
+// Thread: Any. Must be locked: AudioMutex.
+bool CheckAudioDeviceConnected();
+
+// Thread: Main. Locks: AudioMutex.
+void DetachFromDevice(not_null<Audio::Instance*> instance);
+bool DetachIfDeviceChanged(
+	not_null<Audio::Instance*> instance,
+	const Webrtc::DeviceResolvedId &nowDeviceId);
+
+// Thread: Any.
+QMutex *audioPlayerMutex();
+
+// Thread: Any.
+bool audioCheckError();
+
+} // namespace internal
+
+} // namespace Player
+} // namespace Media
+
+VoiceWaveform audioCountWaveform(const Core::FileLocation &file, const QByteArray &data);
+
+namespace Media {
+namespace Audio {
+
+TG_FORCE_INLINE uint16 ReadOneSample(uchar data) {
+	return qAbs((static_cast<int16>(data) - 0x80) * 0x100);
+}
+
+TG_FORCE_INLINE uint16 ReadOneSample(int16 data) {
+	return qAbs(data);
+}
+
+template <typename SampleType, typename Callback>
+void IterateSamples(bytes::const_span bytes, Callback &&callback) {
+	auto samplesPointer = reinterpret_cast<const SampleType*>(bytes.data());
+	auto samplesCount = bytes.size() / sizeof(SampleType);
+	auto samplesData = gsl::make_span(samplesPointer, samplesCount);
+	for (auto sampleData : samplesData) {
+		callback(ReadOneSample(sampleData));
+	}
+}
+
+} // namespace Audio
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_capture.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio_capture.cpp
@@ -0,0 +1,859 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_audio_capture.h"
+
+#include "media/audio/media_audio_capture_common.h"
+#include "media/audio/media_audio_ffmpeg_loader.h"
+#include "media/audio/media_audio_track.h"
+#include "ffmpeg/ffmpeg_utility.h"
+#include "base/timer.h"
+
+#include <al.h>
+#include <alc.h>
+
+#include <numeric>
+
+namespace Media {
+namespace Capture {
+namespace {
+
+constexpr auto kCaptureFrequency = Player::kDefaultFrequency;
+constexpr auto kCaptureSkipDuration = crl::time(400);
+constexpr auto kCaptureFadeInDuration = crl::time(300);
+constexpr auto kCaptureBufferSlice = 256 * 1024;
+constexpr auto kCaptureUpdateDelta = crl::time(100);
+
+Instance *CaptureInstance = nullptr;
+
+bool ErrorHappened(ALCdevice *device) {
+	ALenum errCode;
+	if ((errCode = alcGetError(device)) != ALC_NO_ERROR) {
+		LOG(("Audio Capture Error: %1, %2").arg(errCode).arg((const char *)alcGetString(device, errCode)));
+		return true;
+	}
+	return false;
+}
+
+[[nodiscard]] VoiceWaveform CollectWaveform(
+		const QVector<uchar> &waveformVector) {
+	if (waveformVector.isEmpty()) {
+		return {};
+	}
+	auto waveform = VoiceWaveform();
+	auto count = int64(waveformVector.size());
+	auto sum = int64(0);
+	if (count >= Player::kWaveformSamplesCount) {
+		auto peaks = QVector<uint16>();
+		peaks.reserve(Player::kWaveformSamplesCount);
+
+		auto peak = uint16(0);
+		for (auto i = int32(0); i < count; ++i) {
+			auto sample = uint16(waveformVector.at(i)) * 256;
+			if (peak < sample) {
+				peak = sample;
+			}
+			sum += Player::kWaveformSamplesCount;
+			if (sum >= count) {
+				sum -= count;
+				peaks.push_back(peak);
+				peak = 0;
+			}
+		}
+
+		auto sum = std::accumulate(peaks.cbegin(), peaks.cend(), 0LL);
+		peak = qMax(int32(sum * 1.8 / peaks.size()), 2500);
+
+		waveform.resize(peaks.size());
+		for (int32 i = 0, l = peaks.size(); i != l; ++i) {
+			waveform[i] = char(qMin(
+				31U,
+				uint32(qMin(peaks.at(i), peak)) * 31 / peak));
+		}
+	}
+	return waveform;
+}
+
+} // namespace
+
+class Instance::Inner final : public QObject {
+public:
+	Inner(QThread *thread);
+	~Inner();
+
+	void start(
+		Webrtc::DeviceResolvedId id,
+		Fn<void(Update)> updated,
+		Fn<void()> error,
+		Fn<void(Chunk)> externalProcessing);
+	void stop(Fn<void(Result&&)> callback = nullptr);
+	void pause(bool value, Fn<void(Result&&)> callback);
+
+private:
+	void process();
+
+	bool initializeFFmpeg();
+	[[nodiscard]] bool processFrame(int32 offset, int32 framesize);
+	void fail();
+
+	[[nodiscard]] bool writeFrame(AVFrame *frame);
+
+	// Writes the packets till EAGAIN is got from av_receive_packet()
+	// Returns number of packets written or -1 on error
+	[[nodiscard]] int writePackets();
+
+	Fn<void(Chunk)> _externalProcessing;
+	Fn<void(Update)> _updated;
+	Fn<void()> _error;
+
+	struct Private;
+	const std::unique_ptr<Private> d;
+	base::Timer _timer;
+	QByteArray _captured;
+
+	bool _paused = false;
+
+};
+
+void Start() {
+	Assert(CaptureInstance == nullptr);
+	CaptureInstance = new Instance();
+	instance()->check();
+}
+
+void Finish() {
+	delete base::take(CaptureInstance);
+}
+
+Instance::Instance() : _inner(std::make_unique<Inner>(&_thread)) {
+	CaptureInstance = this;
+	_thread.start();
+}
+
+void Instance::start(Fn<void(Chunk)> externalProcessing) {
+	_updates.fire_done();
+	const auto id = Audio::Current().captureDeviceId();
+	InvokeQueued(_inner.get(), [=] {
+		_inner->start(id, [=](Update update) {
+			crl::on_main(this, [=] {
+				_updates.fire_copy(update);
+			});
+		}, [=] {
+			crl::on_main(this, [=] {
+				_updates.fire_error(Error::Other);
+			});
+		}, externalProcessing);
+		crl::on_main(this, [=] {
+			_started = true;
+		});
+	});
+}
+
+void Instance::stop(Fn<void(Result&&)> callback) {
+	InvokeQueued(_inner.get(), [=] {
+		if (!callback) {
+			_inner->stop();
+			crl::on_main(this, [=] { _started = false; });
+			return;
+		}
+		_inner->stop([=](Result &&result) {
+			crl::on_main([=, result = std::move(result)]() mutable {
+				callback(std::move(result));
+				_started = false;
+			});
+		});
+	});
+}
+
+void Instance::pause(bool value, Fn<void(Result&&)> callback) {
+	InvokeQueued(_inner.get(), [=] {
+		auto done = callback
+			? [=](Result &&result) {
+				crl::on_main([=, result = std::move(result)]() mutable {
+					callback(std::move(result));
+				});
+			}
+			: std::move(callback);
+		_inner->pause(value, std::move(done));
+	});
+}
+
+void Instance::check() {
+	_available = false;
+	if (auto device = alcGetString(0, ALC_CAPTURE_DEFAULT_DEVICE_SPECIFIER)) {
+		if (!QString::fromUtf8(device).isEmpty()) {
+			_available = true;
+			return;
+		}
+	}
+	LOG(("Audio Error: No capture device found!"));
+}
+
+Instance::~Instance() {
+	// Send _inner to it's thread for destruction.
+	if (const auto context = _inner.get()) {
+		InvokeQueued(context, [copy = base::take(_inner)]{});
+	}
+
+	// And wait for it to finish.
+	_thread.quit();
+	_thread.wait();
+}
+
+Instance *instance() {
+	return CaptureInstance;
+}
+
+struct Instance::Inner::Private {
+	ALCdevice *device = nullptr;
+	AVOutputFormat *fmt = nullptr;
+	uchar *ioBuffer = nullptr;
+	AVIOContext *ioContext = nullptr;
+	AVFormatContext *fmtContext = nullptr;
+	AVStream *stream = nullptr;
+	const AVCodec *codec = nullptr;
+	AVCodecContext *codecContext = nullptr;
+	int channels = 0;
+	bool opened = false;
+	bool processing = false;
+
+	int srcSamples = 0;
+	int dstSamples = 0;
+	int maxDstSamples = 0;
+	int dstSamplesSize = 0;
+	int fullSamples = 0;
+	uint8_t **srcSamplesData = nullptr;
+	uint8_t **dstSamplesData = nullptr;
+	SwrContext *swrContext = nullptr;
+
+	int32 lastUpdate = 0;
+	uint16 levelMax = 0;
+
+	QByteArray data;
+	int32 dataPos = 0;
+
+	int64 waveformMod = 0;
+	int64 waveformEach = (kCaptureFrequency / 100);
+	uint16 waveformPeak = 0;
+	QVector<uchar> waveform;
+
+	static int ReadData(void *opaque, uint8_t *buf, int buf_size) {
+		auto l = reinterpret_cast<Private*>(opaque);
+
+		int32 nbytes = qMin(l->data.size() - l->dataPos, int32(buf_size));
+		if (nbytes <= 0) {
+			return AVERROR_EOF;
+		}
+
+		memcpy(buf, l->data.constData() + l->dataPos, nbytes);
+		l->dataPos += nbytes;
+		return nbytes;
+	}
+
+#if DA_FFMPEG_CONST_WRITE_CALLBACK
+	static int WriteData(void *opaque, const uint8_t *buf, int buf_size) {
+#else
+	static int WriteData(void *opaque, uint8_t *buf, int buf_size) {
+#endif
+		auto l = reinterpret_cast<Private*>(opaque);
+
+		if (buf_size <= 0) return 0;
+		if (l->dataPos + buf_size > l->data.size()) l->data.resize(l->dataPos + buf_size);
+		memcpy(l->data.data() + l->dataPos, buf, buf_size);
+		l->dataPos += buf_size;
+		return buf_size;
+	}
+
+	static int64_t SeekData(void *opaque, int64_t offset, int whence) {
+		auto l = reinterpret_cast<Private*>(opaque);
+
+		int32 newPos = -1;
+		switch (whence) {
+		case SEEK_SET: newPos = offset; break;
+		case SEEK_CUR: newPos = l->dataPos + offset; break;
+		case SEEK_END: newPos = l->data.size() + offset; break;
+		case AVSEEK_SIZE: {
+			// Special whence for determining filesize without any seek.
+			return l->data.size();
+		} break;
+		}
+		if (newPos < 0) {
+			return -1;
+		}
+		l->dataPos = newPos;
+		return l->dataPos;
+	}
+};
+
+Instance::Inner::Inner(QThread *thread)
+: d(std::make_unique<Private>())
+, _timer(thread, [=] { process(); }) {
+	moveToThread(thread);
+}
+
+Instance::Inner::~Inner() {
+	stop();
+}
+
+void Instance::Inner::fail() {
+	stop();
+	if (const auto error = base::take(_error)) {
+		InvokeQueued(this, error);
+	}
+}
+
+void Instance::Inner::start(
+		Webrtc::DeviceResolvedId id,
+		Fn<void(Update)> updated,
+		Fn<void()> error,
+		Fn<void(Chunk)> externalProcessing) {
+	_externalProcessing = std::move(externalProcessing);
+	_updated = std::move(updated);
+	_error = std::move(error);
+	if (_paused) {
+		_paused = false;
+	}
+
+	// Start OpenAL Capture
+	const auto utf = id.isDefault() ? std::string() : id.value.toStdString();
+	d->device = alcCaptureOpenDevice(
+		utf.empty() ? nullptr : utf.c_str(),
+		kCaptureFrequency,
+		AL_FORMAT_MONO16,
+		kCaptureFrequency / 5);
+	if (!d->device) {
+		LOG(("Audio Error: capture device not present!"));
+		fail();
+		return;
+	}
+	alcCaptureStart(d->device);
+	if (ErrorHappened(d->device)) {
+		alcCaptureCloseDevice(d->device);
+		d->device = nullptr;
+		fail();
+		return;
+	} else if (!_externalProcessing) {
+		if (!initializeFFmpeg()) {
+			fail();
+			return;
+		}
+	}
+	_timer.callEach(50);
+	_captured.clear();
+	_captured.reserve(kCaptureBufferSlice);
+	DEBUG_LOG(("Audio Capture: started!"));
+}
+
+bool Instance::Inner::initializeFFmpeg() {
+	// Create encoding context
+
+	d->ioBuffer = (uchar*)av_malloc(FFmpeg::kAVBlockSize);
+
+	d->ioContext = avio_alloc_context(d->ioBuffer, FFmpeg::kAVBlockSize, 1, static_cast<void*>(d.get()), &Private::ReadData, &Private::WriteData, &Private::SeekData);
+	int res = 0;
+	char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
+	const AVOutputFormat *fmt = nullptr;
+	void *i = nullptr;
+	while ((fmt = av_muxer_iterate(&i))) {
+		if (fmt->name == u"opus"_q) {
+			break;
+		}
+	}
+	if (!fmt) {
+		LOG(("Audio Error: Unable to find opus AVOutputFormat for capture"));
+		return false;
+	}
+
+	if ((res = avformat_alloc_output_context2(&d->fmtContext, (AVOutputFormat*)fmt, 0, 0)) < 0) {
+		LOG(("Audio Error: Unable to avformat_alloc_output_context2 for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	}
+	d->fmtContext->pb = d->ioContext;
+	d->fmtContext->flags |= AVFMT_FLAG_CUSTOM_IO;
+	d->opened = true;
+
+	// Add audio stream
+	d->codec = avcodec_find_encoder(fmt->audio_codec);
+	if (!d->codec) {
+		LOG(("Audio Error: Unable to avcodec_find_encoder for capture"));
+		return false;
+	}
+	d->stream = avformat_new_stream(d->fmtContext, d->codec);
+	if (!d->stream) {
+		LOG(("Audio Error: Unable to avformat_new_stream for capture"));
+		return false;
+	}
+	d->stream->id = d->fmtContext->nb_streams - 1;
+	d->codecContext = avcodec_alloc_context3(d->codec);
+	if (!d->codecContext) {
+		LOG(("Audio Error: Unable to avcodec_alloc_context3 for capture"));
+		return false;
+	}
+
+	av_opt_set_int(d->codecContext, "refcounted_frames", 1, 0);
+
+	d->codecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
+	d->codecContext->bit_rate = 32000;
+	d->codecContext->ch_layout = AV_CHANNEL_LAYOUT_MONO;
+	d->channels = d->codecContext->ch_layout.nb_channels;
+	d->codecContext->sample_rate = kCaptureFrequency;
+
+	if (d->fmtContext->oformat->flags & AVFMT_GLOBALHEADER) {
+		d->codecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+	}
+
+	// Open audio stream
+	if ((res = avcodec_open2(d->codecContext, d->codec, nullptr)) < 0) {
+		LOG(("Audio Error: Unable to avcodec_open2 for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	}
+
+	// Alloc source samples
+
+	d->srcSamples = (d->codecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) ? 10000 : d->codecContext->frame_size;
+	//if ((res = av_samples_alloc_array_and_samples(&d->srcSamplesData, 0, d->codecContext->channels, d->srcSamples, d->codecContext->sample_fmt, 0)) < 0) {
+	//	LOG(("Audio Error: Unable to av_samples_alloc_array_and_samples for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+	//	onStop(false);
+	//	emit error();
+	//	return;
+	//}
+	// Using _captured directly
+
+	// Prepare resampling
+	res = swr_alloc_set_opts2(
+		&d->swrContext,
+		&d->codecContext->ch_layout,
+		d->codecContext->sample_fmt,
+		d->codecContext->sample_rate,
+		&d->codecContext->ch_layout,
+		AV_SAMPLE_FMT_S16,
+		d->codecContext->sample_rate,
+		0,
+		nullptr);
+	if (res < 0 || !d->swrContext) {
+		LOG(("Audio Error: Unable to swr_alloc_set_opts2 for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	} else if ((res = swr_init(d->swrContext)) < 0) {
+		LOG(("Audio Error: Unable to swr_init for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	}
+	d->maxDstSamples = d->srcSamples;
+	if ((res = av_samples_alloc_array_and_samples(&d->dstSamplesData, 0, d->channels, d->maxDstSamples, d->codecContext->sample_fmt, 0)) < 0) {
+		LOG(("Audio Error: Unable to av_samples_alloc_array_and_samples for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	}
+	d->dstSamplesSize = av_samples_get_buffer_size(0, d->channels, d->maxDstSamples, d->codecContext->sample_fmt, 0);
+	if ((res = avcodec_parameters_from_context(d->stream->codecpar, d->codecContext)) < 0) {
+		LOG(("Audio Error: Unable to avcodec_parameters_from_context for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	}
+	// Write file header
+	if ((res = avformat_write_header(d->fmtContext, 0)) < 0) {
+		LOG(("Audio Error: Unable to avformat_write_header for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		return false;
+	}
+	return true;
+}
+
+void Instance::Inner::pause(bool value, Fn<void(Result&&)> callback) {
+	_paused = value;
+	if (!_paused) {
+		return;
+	}
+	if (callback) {
+		callback({
+			.bytes = d->fullSamples ? d->data : QByteArray(),
+			.waveform = (d->fullSamples
+				? CollectWaveform(d->waveform)
+				: VoiceWaveform()),
+			.duration = ((d->fullSamples * crl::time(1000))
+				/ int64(kCaptureFrequency)),
+		});
+	}
+}
+
+void Instance::Inner::stop(Fn<void(Result&&)> callback) {
+	if (!_timer.isActive()) {
+		return; // in stop() already
+	}
+	_paused = false;
+	_timer.cancel();
+
+	const auto needResult = (callback != nullptr);
+	const auto hadDevice = (d->device != nullptr);
+	if (hadDevice) {
+		alcCaptureStop(d->device);
+		if (d->processing) {
+			Assert(!needResult); // stop in the middle of processing - error.
+		} else {
+			process(); // get last data
+		}
+		alcCaptureCloseDevice(d->device);
+		d->device = nullptr;
+	}
+
+	// Write what is left
+	if (needResult && !_captured.isEmpty()) {
+		auto fadeSamples = kCaptureFadeInDuration * kCaptureFrequency / 1000;
+		auto capturedSamples = static_cast<int>(_captured.size() / sizeof(short));
+		if ((_captured.size() % sizeof(short)) || (d->fullSamples + capturedSamples < kCaptureFrequency) || (capturedSamples < fadeSamples)) {
+			d->fullSamples = 0;
+			d->dataPos = 0;
+			d->data.clear();
+			d->waveformMod = 0;
+			d->waveformPeak = 0;
+			d->waveform.clear();
+		} else {
+			float64 coef = 1. / fadeSamples, fadedFrom = 0;
+			for (short *ptr = ((short*)_captured.data()) + capturedSamples, *end = ptr - fadeSamples; ptr != end; ++fadedFrom) {
+				--ptr;
+				*ptr = qRound(fadedFrom * coef * *ptr);
+			}
+			if (capturedSamples % d->srcSamples) {
+				int32 s = _captured.size();
+				_captured.resize(s + (d->srcSamples - (capturedSamples % d->srcSamples)) * sizeof(short));
+				memset(_captured.data() + s, 0, _captured.size() - s);
+			}
+
+			int32 framesize = d->srcSamples * d->channels * sizeof(short), encoded = 0;
+			while (_captured.size() >= encoded + framesize) {
+				if (!processFrame(encoded, framesize)) {
+					break;
+				}
+				encoded += framesize;
+			}
+			// Drain the codec.
+			if (!writeFrame(nullptr) || encoded != _captured.size()) {
+				d->fullSamples = 0;
+				d->dataPos = 0;
+				d->data.clear();
+				d->waveformMod = 0;
+				d->waveformPeak = 0;
+				d->waveform.clear();
+			}
+		}
+	}
+	DEBUG_LOG(("Audio Capture: "
+		"stopping (need result: %1), size: %2, samples: %3"
+		).arg(Logs::b(callback != nullptr)
+		).arg(d->data.size()
+		).arg(d->fullSamples));
+	_captured = QByteArray();
+
+	// Finish stream
+	if (needResult && hadDevice && d->fmtContext) {
+		av_write_trailer(d->fmtContext);
+	}
+
+	QByteArray result = d->fullSamples ? d->data : QByteArray();
+	VoiceWaveform waveform;
+	qint32 samples = d->fullSamples;
+	if (needResult && samples && !d->waveform.isEmpty()) {
+		waveform = CollectWaveform(d->waveform);
+	}
+	if (hadDevice) {
+		if (d->codecContext) {
+			avcodec_free_context(&d->codecContext);
+			d->codecContext = nullptr;
+		}
+		if (d->srcSamplesData) {
+			if (d->srcSamplesData[0]) {
+				av_freep(&d->srcSamplesData[0]);
+			}
+			av_freep(&d->srcSamplesData);
+		}
+		if (d->dstSamplesData) {
+			if (d->dstSamplesData[0]) {
+				av_freep(&d->dstSamplesData[0]);
+			}
+			av_freep(&d->dstSamplesData);
+		}
+		d->fullSamples = 0;
+		if (d->swrContext) {
+			swr_free(&d->swrContext);
+			d->swrContext = nullptr;
+		}
+		if (d->opened) {
+			avformat_close_input(&d->fmtContext);
+			d->opened = false;
+		}
+		if (d->ioContext) {
+			av_freep(&d->ioContext->buffer);
+			av_freep(&d->ioContext);
+			d->ioBuffer = nullptr;
+		} else if (d->ioBuffer) {
+			av_freep(&d->ioBuffer);
+		}
+		if (d->fmtContext) {
+			avformat_free_context(d->fmtContext);
+			d->fmtContext = nullptr;
+		}
+		d->fmt = nullptr;
+		d->stream = nullptr;
+		d->codec = nullptr;
+
+		d->lastUpdate = 0;
+		d->levelMax = 0;
+
+		d->dataPos = 0;
+		d->data.clear();
+
+		d->waveformMod = 0;
+		d->waveformPeak = 0;
+		d->waveform.clear();
+	}
+
+	if (needResult) {
+		callback({
+			.bytes = result,
+			.waveform = waveform,
+			.duration = (samples * crl::time(1000)) / kCaptureFrequency,
+		});
+	}
+}
+
+void Instance::Inner::process() {
+	Expects(!d->processing);
+
+	if (_paused) {
+		return;
+	}
+
+	d->processing = true;
+	const auto guard = gsl::finally([&] { d->processing = false; });
+
+	if (!d->device) {
+		_timer.cancel();
+		return;
+	}
+	ALint samples;
+	alcGetIntegerv(d->device, ALC_CAPTURE_SAMPLES, 1, &samples);
+	if (ErrorHappened(d->device)) {
+		fail();
+		return;
+	}
+	if (samples > 0) {
+		// Get samples from OpenAL
+		auto s = _captured.size();
+		auto news = s + static_cast<int>(samples * sizeof(short));
+		if (news / kCaptureBufferSlice > s / kCaptureBufferSlice) {
+			_captured.reserve(((news / kCaptureBufferSlice) + 1) * kCaptureBufferSlice);
+		}
+		_captured.resize(news);
+		alcCaptureSamples(d->device, (ALCvoid *)(_captured.data() + s), samples);
+		if (ErrorHappened(d->device)) {
+			fail();
+			return;
+		} else if (_externalProcessing) {
+			_externalProcessing({
+				.finished = crl::now(),
+				.samples = base::take(_captured),
+				.frequency = kCaptureFrequency,
+			});
+			return;
+		}
+
+		// Count new recording level and update view
+		auto skipSamples = kCaptureSkipDuration * kCaptureFrequency / 1000;
+		auto fadeSamples = kCaptureFadeInDuration * kCaptureFrequency / 1000;
+		auto levelindex = d->fullSamples + static_cast<int>(s / sizeof(short));
+		for (auto ptr = (const short*)(_captured.constData() + s), end = (const short*)(_captured.constData() + news); ptr < end; ++ptr, ++levelindex) {
+			if (levelindex > skipSamples) {
+				uint16 value = qAbs(*ptr);
+				if (levelindex < skipSamples + fadeSamples) {
+					value = qRound(value * float64(levelindex - skipSamples) / fadeSamples);
+				}
+				if (d->levelMax < value) {
+					d->levelMax = value;
+				}
+			}
+		}
+		qint32 samplesFull = d->fullSamples + _captured.size() / sizeof(short), samplesSinceUpdate = samplesFull - d->lastUpdate;
+		if (samplesSinceUpdate > kCaptureUpdateDelta * kCaptureFrequency / 1000) {
+			_updated(Update{ .samples = samplesFull, .level = d->levelMax });
+			d->lastUpdate = samplesFull;
+			d->levelMax = 0;
+		}
+		// Write frames
+		int32 framesize = d->srcSamples * d->channels * sizeof(short), encoded = 0;
+		while (uint32(_captured.size()) >= encoded + framesize + fadeSamples * sizeof(short)) {
+			if (!processFrame(encoded, framesize)) {
+				return;
+			}
+			encoded += framesize;
+		}
+
+		// Collapse the buffer
+		if (encoded > 0) {
+			int32 goodSize = _captured.size() - encoded;
+			memmove(_captured.data(), _captured.constData() + encoded, goodSize);
+			_captured.resize(goodSize);
+		}
+	} else {
+		DEBUG_LOG(("Audio Capture: no samples to capture."));
+	}
+}
+
+bool Instance::Inner::processFrame(int32 offset, int32 framesize) {
+	// Prepare audio frame
+
+	if (framesize % sizeof(short)) { // in the middle of a sample
+		LOG(("Audio Error: Bad framesize in writeFrame() for capture, framesize %1, %2").arg(framesize));
+		fail();
+		return false;
+	}
+	auto samplesCnt = static_cast<int>(framesize / sizeof(short));
+
+	int res = 0;
+	char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
+
+	auto srcSamplesDataChannel = (short*)(_captured.data() + offset);
+	auto srcSamplesData = &srcSamplesDataChannel;
+
+	//	memcpy(d->srcSamplesData[0], _captured.constData() + offset, framesize);
+	auto skipSamples = static_cast<int>(kCaptureSkipDuration * kCaptureFrequency / 1000);
+	auto fadeSamples = static_cast<int>(kCaptureFadeInDuration * kCaptureFrequency / 1000);
+	if (d->fullSamples < skipSamples + fadeSamples) {
+		int32 fadedCnt = qMin(samplesCnt, skipSamples + fadeSamples - d->fullSamples);
+		float64 coef = 1. / fadeSamples, fadedFrom = d->fullSamples - skipSamples;
+		short *ptr = srcSamplesDataChannel, *zeroEnd = ptr + qMin(samplesCnt, qMax(0, skipSamples - d->fullSamples)), *end = ptr + fadedCnt;
+		for (; ptr != zeroEnd; ++ptr, ++fadedFrom) {
+			*ptr = 0;
+		}
+		for (; ptr != end; ++ptr, ++fadedFrom) {
+			*ptr = qRound(fadedFrom * coef * *ptr);
+		}
+	}
+
+	d->waveform.reserve(d->waveform.size() + (samplesCnt / d->waveformEach) + 1);
+	for (short *ptr = srcSamplesDataChannel, *end = ptr + samplesCnt; ptr != end; ++ptr) {
+		uint16 value = qAbs(*ptr);
+		if (d->waveformPeak < value) {
+			d->waveformPeak = value;
+		}
+		if (++d->waveformMod == d->waveformEach) {
+			d->waveformMod -= d->waveformEach;
+			d->waveform.push_back(uchar(d->waveformPeak / 256));
+			d->waveformPeak = 0;
+		}
+	}
+
+	// Convert to final format
+
+	d->dstSamples = av_rescale_rnd(swr_get_delay(d->swrContext, d->codecContext->sample_rate) + d->srcSamples, d->codecContext->sample_rate, d->codecContext->sample_rate, AV_ROUND_UP);
+	if (d->dstSamples > d->maxDstSamples) {
+		d->maxDstSamples = d->dstSamples;
+		av_freep(&d->dstSamplesData[0]);
+		if ((res = av_samples_alloc(d->dstSamplesData, 0, d->channels, d->dstSamples, d->codecContext->sample_fmt, 1)) < 0) {
+			LOG(("Audio Error: Unable to av_samples_alloc for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+			fail();
+			return false;
+		}
+		d->dstSamplesSize = av_samples_get_buffer_size(0, d->channels, d->maxDstSamples, d->codecContext->sample_fmt, 0);
+	}
+
+	if ((res = swr_convert(d->swrContext, d->dstSamplesData, d->dstSamples, (const uint8_t **)srcSamplesData, d->srcSamples)) < 0) {
+		LOG(("Audio Error: Unable to swr_convert for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		fail();
+		return false;
+	}
+
+	// Write audio frame
+
+	AVFrame *frame = av_frame_alloc();
+
+	frame->format = d->codecContext->sample_fmt;
+	av_channel_layout_copy(&frame->ch_layout, &d->codecContext->ch_layout);
+	frame->sample_rate = d->codecContext->sample_rate;
+	frame->nb_samples = d->dstSamples;
+	frame->pts = av_rescale_q(d->fullSamples, AVRational { 1, d->codecContext->sample_rate }, d->codecContext->time_base);
+
+	avcodec_fill_audio_frame(frame, d->channels, d->codecContext->sample_fmt, d->dstSamplesData[0], d->dstSamplesSize, 0);
+
+	if (!writeFrame(frame)) {
+		return false;
+	}
+
+	d->fullSamples += samplesCnt;
+
+	av_frame_free(&frame);
+	return true;
+}
+
+bool Instance::Inner::writeFrame(AVFrame *frame) {
+	int res = 0;
+	char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
+
+	res = avcodec_send_frame(d->codecContext, frame);
+	if (res == AVERROR(EAGAIN)) {
+		const auto packetsWritten = writePackets();
+		if (packetsWritten < 0) {
+			if (frame && packetsWritten == AVERROR_EOF) {
+				LOG(("Audio Error: EOF in packets received when EAGAIN was got in avcodec_send_frame()"));
+				fail();
+				return false;
+			}
+			return true;
+		} else if (!packetsWritten) {
+			LOG(("Audio Error: No packets received when EAGAIN was got in avcodec_send_frame()"));
+			fail();
+			return false;
+		}
+		res = avcodec_send_frame(d->codecContext, frame);
+	}
+	if (res < 0) {
+		LOG(("Audio Error: Unable to avcodec_send_frame for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+		fail();
+		return false;
+	}
+
+	if (!frame) { // drain
+		if ((res = writePackets()) != AVERROR_EOF) {
+			LOG(("Audio Error: not EOF in packets received when draining the codec, result %1").arg(res));
+			fail();
+			return false;
+		}
+	}
+	return true;
+}
+
+int Instance::Inner::writePackets() {
+	AVPacket *pkt = av_packet_alloc();
+	const auto guard = gsl::finally([&] { av_packet_free(&pkt); });
+
+	int res = 0;
+	char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
+
+	int written = 0;
+	do {
+		if ((res = avcodec_receive_packet(d->codecContext, pkt)) < 0) {
+			if (res == AVERROR(EAGAIN)) {
+				return written;
+			} else if (res == AVERROR_EOF) {
+				return res;
+			}
+			LOG(("Audio Error: Unable to avcodec_receive_packet for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+			fail();
+			return res;
+		}
+
+		av_packet_rescale_ts(pkt, d->codecContext->time_base, d->stream->time_base);
+		pkt->stream_index = d->stream->index;
+		if ((res = av_interleaved_write_frame(d->fmtContext, pkt)) < 0) {
+			LOG(("Audio Error: Unable to av_interleaved_write_frame for capture, error %1, %2").arg(res).arg(av_make_error_string(err, sizeof(err), res)));
+			fail();
+			return -1;
+		}
+
+		++written;
+		av_packet_unref(pkt);
+	} while (true);
+	return written;
+}
+
+} // namespace Capture
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_capture.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_capture.h
@@ -0,0 +1,84 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include <QtCore/QThread>
+#include <QtCore/QTimer>
+
+namespace Media {
+namespace Capture {
+
+struct Update {
+	int samples = 0;
+	ushort level = 0;
+
+	bool finished = false;
+};
+
+enum class Error : uchar {
+	Other,
+	AudioInit,
+	VideoInit,
+	AudioTimeout,
+	VideoTimeout,
+	Encoding,
+};
+
+struct Chunk {
+	crl::time finished = 0;
+	QByteArray samples;
+	int frequency = 0;
+};
+
+struct Result;
+
+void Start();
+void Finish();
+
+class Instance final : public QObject {
+public:
+	Instance();
+	~Instance();
+
+	void check();
+	[[nodiscard]] bool available() const {
+		return _available;
+	}
+
+	[[nodiscard]] rpl::producer<Update, Error> updated() const {
+		return _updates.events();
+	}
+
+	[[nodiscard]] bool started() const {
+		return _started.current();
+	}
+	[[nodiscard]] rpl::producer<bool> startedChanges() const {
+		return _started.changes();
+	}
+
+	void start(Fn<void(Chunk)> externalProcessing = nullptr);
+	void stop(Fn<void(Result&&)> callback = nullptr);
+	void pause(bool value, Fn<void(Result&&)> callback = nullptr);
+
+private:
+	class Inner;
+	friend class Inner;
+
+	bool _available = false;
+	rpl::variable<bool> _started = false;
+	rpl::event_stream<Update, Error> _updates;
+	QThread _thread;
+	std::unique_ptr<Inner> _inner;
+
+};
+
+[[nodiscard]] Instance *instance();
+
+} // namespace Capture
+} // namespace Media
+
--- a/Telegram/SourceFiles/media/audio/media_audio_capture_common.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_capture_common.h
@@ -0,0 +1,19 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+namespace Media::Capture {
+
+struct Result {
+	QByteArray bytes;
+	VoiceWaveform waveform;
+	crl::time duration;
+	bool video = false;
+};
+
+} // namespace Media::Capture
--- a/Telegram/SourceFiles/media/audio/media_audio_ffmpeg_loader.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio_ffmpeg_loader.cpp
@@ -0,0 +1,772 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_audio_ffmpeg_loader.h"
+
+#include "base/bytes.h"
+#include "core/file_location.h"
+#include "ffmpeg/ffmpeg_utility.h"
+#include "media/media_common.h"
+
+extern "C" {
+#include <libavfilter/buffersink.h>
+#include <libavfilter/buffersrc.h>
+} // extern "C"
+
+namespace Media {
+namespace {
+
+using FFmpeg::AvErrorWrap;
+using FFmpeg::LogError;
+
+} // namespace
+
+int64 AbstractFFMpegLoader::Mul(int64 value, AVRational rational) {
+	return value * rational.num / rational.den;
+}
+
+bool AbstractFFMpegLoader::open(crl::time positionMs, float64 speed) {
+	if (!AudioPlayerLoader::openFile()) {
+		return false;
+	}
+
+	ioBuffer = (uchar *)av_malloc(FFmpeg::kAVBlockSize);
+	if (!_data.isEmpty()) {
+		ioContext = avio_alloc_context(ioBuffer, FFmpeg::kAVBlockSize, 0, reinterpret_cast<void *>(this), &AbstractFFMpegLoader::ReadData, 0, &AbstractFFMpegLoader::SeekData);
+	} else if (!_bytes.empty()) {
+		ioContext = avio_alloc_context(ioBuffer, FFmpeg::kAVBlockSize, 0, reinterpret_cast<void *>(this), &AbstractFFMpegLoader::ReadBytes, 0, &AbstractFFMpegLoader::SeekBytes);
+	} else {
+		ioContext = avio_alloc_context(ioBuffer, FFmpeg::kAVBlockSize, 0, reinterpret_cast<void *>(this), &AbstractFFMpegLoader::ReadFile, 0, &AbstractFFMpegLoader::SeekFile);
+	}
+	fmtContext = avformat_alloc_context();
+	if (!fmtContext) {
+		LogError(u"avformat_alloc_context"_q);
+		return false;
+	}
+	fmtContext->pb = ioContext;
+
+	if (AvErrorWrap error = avformat_open_input(&fmtContext, 0, 0, 0)) {
+		ioBuffer = nullptr;
+		LogError(u"avformat_open_input"_q, error);
+		return false;
+	}
+	_opened = true;
+
+	if (AvErrorWrap error = avformat_find_stream_info(fmtContext, 0)) {
+		LogError(u"avformat_find_stream_info"_q, error);
+		return false;
+	}
+
+	streamId = av_find_best_stream(fmtContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
+	if (streamId < 0) {
+		FFmpeg::LogError(u"av_find_best_stream"_q, AvErrorWrap(streamId));
+		return false;
+	}
+
+	const auto stream = fmtContext->streams[streamId];
+	const auto params = stream->codecpar;
+	_samplesFrequency = params->sample_rate;
+	if (stream->duration != AV_NOPTS_VALUE) {
+		_duration = Mul(stream->duration * 1000, stream->time_base);
+	} else {
+		_duration = Mul(fmtContext->duration * 1000, { 1, AV_TIME_BASE });
+	}
+	_startedAtSample = (positionMs * _samplesFrequency) / 1000LL;
+
+	return true;
+}
+
+AbstractFFMpegLoader::~AbstractFFMpegLoader() {
+	if (_opened) {
+		avformat_close_input(&fmtContext);
+	}
+	if (ioContext) {
+		av_freep(&ioContext->buffer);
+		av_freep(&ioContext);
+	} else if (ioBuffer) {
+		av_freep(&ioBuffer);
+	}
+	if (fmtContext) avformat_free_context(fmtContext);
+}
+
+int AbstractFFMpegLoader::ReadData(void *opaque, uint8_t *buf, int buf_size) {
+	auto l = reinterpret_cast<AbstractFFMpegLoader *>(opaque);
+
+	auto nbytes = qMin(l->_data.size() - l->_dataPos, int32(buf_size));
+	if (nbytes <= 0) {
+		return AVERROR_EOF;
+	}
+
+	memcpy(buf, l->_data.constData() + l->_dataPos, nbytes);
+	l->_dataPos += nbytes;
+	return nbytes;
+}
+
+int64_t AbstractFFMpegLoader::SeekData(void *opaque, int64_t offset, int whence) {
+	auto l = reinterpret_cast<AbstractFFMpegLoader *>(opaque);
+
+	int32 newPos = -1;
+	switch (whence) {
+	case SEEK_SET: newPos = offset; break;
+	case SEEK_CUR: newPos = l->_dataPos + offset; break;
+	case SEEK_END: newPos = l->_data.size() + offset; break;
+	case AVSEEK_SIZE: {
+		// Special whence for determining filesize without any seek.
+		return l->_data.size();
+	} break;
+	}
+	if (newPos < 0 || newPos > l->_data.size()) {
+		return -1;
+	}
+	l->_dataPos = newPos;
+	return l->_dataPos;
+}
+
+int AbstractFFMpegLoader::ReadBytes(void *opaque, uint8_t *buf, int buf_size) {
+	auto l = reinterpret_cast<AbstractFFMpegLoader *>(opaque);
+
+	auto nbytes = qMin(static_cast<int>(l->_bytes.size()) - l->_dataPos, buf_size);
+	if (nbytes <= 0) {
+		return AVERROR_EOF;
+	}
+
+	memcpy(buf, l->_bytes.data() + l->_dataPos, nbytes);
+	l->_dataPos += nbytes;
+	return nbytes;
+}
+
+int64_t AbstractFFMpegLoader::SeekBytes(void *opaque, int64_t offset, int whence) {
+	auto l = reinterpret_cast<AbstractFFMpegLoader *>(opaque);
+
+	int32 newPos = -1;
+	switch (whence) {
+	case SEEK_SET: newPos = offset; break;
+	case SEEK_CUR: newPos = l->_dataPos + offset; break;
+	case SEEK_END: newPos = static_cast<int>(l->_bytes.size()) + offset; break;
+	case AVSEEK_SIZE:
+	{
+		// Special whence for determining filesize without any seek.
+		return l->_bytes.size();
+	} break;
+	}
+	if (newPos < 0 || newPos > l->_bytes.size()) {
+		return -1;
+	}
+	l->_dataPos = newPos;
+	return l->_dataPos;
+}
+
+int AbstractFFMpegLoader::ReadFile(void *opaque, uint8_t *buf, int buf_size) {
+	auto l = reinterpret_cast<AbstractFFMpegLoader *>(opaque);
+	int ret = l->_f.read((char *)(buf), buf_size);
+	switch (ret) {
+	case -1: return AVERROR_EXTERNAL;
+	case 0: return AVERROR_EOF;
+	default: return ret;
+	}
+}
+
+int64_t AbstractFFMpegLoader::SeekFile(void *opaque, int64_t offset, int whence) {
+	auto l = reinterpret_cast<AbstractFFMpegLoader *>(opaque);
+
+	switch (whence) {
+	case SEEK_SET: return l->_f.seek(offset) ? l->_f.pos() : -1;
+	case SEEK_CUR: return l->_f.seek(l->_f.pos() + offset) ? l->_f.pos() : -1;
+	case SEEK_END: return l->_f.seek(l->_f.size() + offset) ? l->_f.pos() : -1;
+	case AVSEEK_SIZE:
+	{
+		// Special whence for determining filesize without any seek.
+		return l->_f.size();
+	} break;
+	}
+	return -1;
+}
+
+AbstractAudioFFMpegLoader::AbstractAudioFFMpegLoader(
+	const Core::FileLocation &file,
+	const QByteArray &data,
+	bytes::vector &&buffer)
+: AbstractFFMpegLoader(file, data, std::move(buffer))
+, _frame(FFmpeg::MakeFramePointer()) {
+}
+
+void AbstractAudioFFMpegLoader::dropFramesTill(int64 samples) {
+	const auto isAfter = [&](const EnqueuedFrame &frame) {
+		return frame.position > samples;
+	};
+	const auto from = begin(_framesQueued);
+	const auto after = ranges::find_if(_framesQueued, isAfter);
+	if (from == after) {
+		return;
+	}
+	const auto till = after - 1;
+	const auto erasing = till - from;
+	if (erasing > 0) {
+		if (_framesQueuedIndex >= 0) {
+			Assert(_framesQueuedIndex >= erasing);
+			_framesQueuedIndex -= erasing;
+		}
+		_framesQueued.erase(from, till);
+		if (_framesQueued.empty()) {
+			_framesQueuedIndex = -1;
+		}
+	}
+}
+
+int64 AbstractAudioFFMpegLoader::startReadingQueuedFrames(float64 newSpeed) {
+	changeSpeedFilter(newSpeed);
+	if (_framesQueued.empty()) {
+		_framesQueuedIndex = -1;
+		return -1;
+	}
+	_framesQueuedIndex = 0;
+	return _framesQueued.front().position;
+}
+
+bool AbstractAudioFFMpegLoader::initUsingContext(
+		not_null<AVCodecContext*> context,
+		float64 speed) {
+	_swrSrcSampleFormat = context->sample_fmt;
+	const AVChannelLayout mono = AV_CHANNEL_LAYOUT_MONO;
+	const AVChannelLayout stereo = AV_CHANNEL_LAYOUT_STEREO;
+	if (!av_channel_layout_compare(&context->ch_layout, &mono)) {
+		switch (_swrSrcSampleFormat) {
+		case AV_SAMPLE_FMT_U8:
+		case AV_SAMPLE_FMT_U8P:
+			_swrDstSampleFormat = _swrSrcSampleFormat;
+			av_channel_layout_copy(&_swrDstChannelLayout, &context->ch_layout);
+			_outputChannels = 1;
+			_outputSampleSize = 1;
+			_outputFormat = AL_FORMAT_MONO8;
+			break;
+		case AV_SAMPLE_FMT_S16:
+		case AV_SAMPLE_FMT_S16P:
+			_swrDstSampleFormat = _swrSrcSampleFormat;
+			av_channel_layout_copy(&_swrDstChannelLayout, &context->ch_layout);
+			_outputChannels = 1;
+			_outputSampleSize = sizeof(uint16);
+			_outputFormat = AL_FORMAT_MONO16;
+			break;
+		}
+	} else if (!av_channel_layout_compare(&context->ch_layout, &stereo)) {
+		switch (_swrSrcSampleFormat) {
+		case AV_SAMPLE_FMT_U8:
+			_swrDstSampleFormat = _swrSrcSampleFormat;
+			av_channel_layout_copy(&_swrDstChannelLayout, &context->ch_layout);
+			_outputChannels = 2;
+			_outputSampleSize = 2;
+			_outputFormat = AL_FORMAT_STEREO8;
+			break;
+		case AV_SAMPLE_FMT_S16:
+			_swrDstSampleFormat = _swrSrcSampleFormat;
+			av_channel_layout_copy(&_swrDstChannelLayout, &context->ch_layout);
+			_outputChannels = 2;
+			_outputSampleSize = 2 * sizeof(uint16);
+			_outputFormat = AL_FORMAT_STEREO16;
+			break;
+		}
+	}
+
+	createSpeedFilter(speed);
+
+	return true;
+}
+
+auto AbstractAudioFFMpegLoader::replaceFrameAndRead(
+	FFmpeg::FramePointer frame)
+-> ReadResult {
+	_frame = std::move(frame);
+	return readFromReadyFrame();
+}
+
+auto AbstractAudioFFMpegLoader::readFromReadyContext(
+	not_null<AVCodecContext*> context)
+-> ReadResult {
+	if (_filterGraph) {
+		AvErrorWrap error = av_buffersink_get_frame(
+			_filterSink,
+			_filteredFrame.get());
+		if (!error) {
+			if (!_filteredFrame->nb_samples) {
+				return ReadError::Retry;
+			}
+			return bytes::const_span(
+				reinterpret_cast<const bytes::type*>(
+					_filteredFrame->extended_data[0]),
+				_filteredFrame->nb_samples * _outputSampleSize);
+		} else if (error.code() == AVERROR_EOF) {
+			return ReadError::EndOfFile;
+		} else if (error.code() != AVERROR(EAGAIN)) {
+			LogError(u"av_buffersink_get_frame"_q, error);
+			return ReadError::Other;
+		}
+	}
+	using Enqueued = not_null<const EnqueuedFrame*>;
+	const auto queueResult = fillFrameFromQueued();
+	if (queueResult == ReadError::RetryNotQueued) {
+		return ReadError::RetryNotQueued;
+	} else if (const auto enqueued = std::get_if<Enqueued>(&queueResult)) {
+		const auto raw = (*enqueued)->frame.get();
+		Assert(frameHasDesiredFormat(raw));
+		return readOrBufferForFilter(raw, (*enqueued)->samples);
+	}
+
+	const auto queueError = v::get<ReadError>(queueResult);
+	AvErrorWrap error = (queueError == ReadError::EndOfFile)
+		? AVERROR_EOF
+		: avcodec_receive_frame(context, _frame.get());
+	if (!error) {
+		return readFromReadyFrame();
+	}
+
+	if (error.code() == AVERROR_EOF) {
+		enqueueFramesFinished();
+		if (!_filterGraph) {
+			return ReadError::EndOfFile;
+		}
+		AvErrorWrap error = av_buffersrc_add_frame(_filterSrc, nullptr);
+		if (!error) {
+			return ReadError::Retry;
+		}
+		LogError(u"av_buffersrc_add_frame"_q, error);
+		return ReadError::Other;
+	} else if (error.code() != AVERROR(EAGAIN)) {
+		LogError(u"avcodec_receive_frame"_q, error);
+		return ReadError::Other;
+	}
+	return ReadError::Wait;
+}
+
+auto AbstractAudioFFMpegLoader::fillFrameFromQueued()
+-> std::variant<not_null<const EnqueuedFrame*>, ReadError> {
+	if (_framesQueuedIndex == _framesQueued.size()) {
+		_framesQueuedIndex = -1;
+		return ReadError::RetryNotQueued;
+	} else if (_framesQueuedIndex < 0) {
+		return ReadError::Wait;
+	}
+	const auto &queued = _framesQueued[_framesQueuedIndex];
+	++_framesQueuedIndex;
+
+	if (!queued.frame) {
+		return ReadError::EndOfFile;
+	}
+	return &queued;
+}
+
+bool AbstractAudioFFMpegLoader::frameHasDesiredFormat(
+		not_null<AVFrame*> frame) const {
+	return true
+		&& (frame->format == _swrDstSampleFormat)
+		&& (frame->sample_rate == _swrDstRate)
+		&& !av_channel_layout_compare(
+			&frame->ch_layout,
+			&_swrDstChannelLayout);
+}
+
+bool AbstractAudioFFMpegLoader::initResampleForFrame() {
+	if (!_frame->ch_layout.nb_channels) {
+		LOG(("Audio Error: "
+			"Unknown channel layout for frame in file '%1', "
+			"data size '%2'"
+			).arg(_file.name()
+			).arg(_data.size()
+			));
+		return false;
+	} else if (_frame->format == -1) {
+		LOG(("Audio Error: "
+			"Unknown frame format in file '%1', data size '%2'"
+			).arg(_file.name()
+			).arg(_data.size()
+			));
+		return false;
+	} else if (_swrContext) {
+		if (true
+			&& (_frame->format == _swrSrcSampleFormat)
+			&& (_frame->sample_rate == _swrSrcRate)
+			&& !av_channel_layout_compare(
+				&_frame->ch_layout,
+				&_swrSrcChannelLayout)) {
+			return true;
+		}
+		swr_close(_swrContext);
+	}
+
+	_swrSrcSampleFormat = static_cast<AVSampleFormat>(_frame->format);
+	av_channel_layout_copy(&_swrSrcChannelLayout, &_frame->ch_layout);
+	_swrSrcRate = _frame->sample_rate;
+	return initResampleUsingFormat();
+}
+
+bool AbstractAudioFFMpegLoader::initResampleUsingFormat() {
+	auto error = swr_alloc_set_opts2(
+		&_swrContext,
+		&_swrDstChannelLayout,
+		_swrDstSampleFormat,
+		_swrDstRate,
+		&_swrSrcChannelLayout,
+		_swrSrcSampleFormat,
+		_swrSrcRate,
+		0,
+		nullptr);
+	if (error || !_swrContext) {
+		LogError(u"swr_alloc_set_opts2"_q, error);
+		return false;
+	} else if (AvErrorWrap error = swr_init(_swrContext)) {
+		LogError(u"swr_init"_q, error);
+		return false;
+	}
+	_resampledFrame = nullptr;
+	_resampledFrameCapacity = 0;
+	return true;
+}
+
+bool AbstractAudioFFMpegLoader::ensureResampleSpaceAvailable(int samples) {
+	const auto enlarge = (_resampledFrameCapacity < samples);
+	if (!_resampledFrame) {
+		_resampledFrame = FFmpeg::MakeFramePointer();
+	} else if (enlarge || !av_frame_is_writable(_resampledFrame.get())) {
+		av_frame_unref(_resampledFrame.get());
+	} else {
+		return true;
+	}
+	const auto allocate = std::max(samples, int(av_rescale_rnd(
+		FFmpeg::kAVBlockSize / _outputSampleSize,
+		_swrDstRate,
+		_swrSrcRate,
+		AV_ROUND_UP)));
+	_resampledFrame->sample_rate = _swrDstRate;
+	_resampledFrame->format = _swrDstSampleFormat;
+	av_channel_layout_copy(
+		&_resampledFrame->ch_layout,
+		&_swrDstChannelLayout);
+	_resampledFrame->nb_samples = allocate;
+	if (AvErrorWrap error = av_frame_get_buffer(_resampledFrame.get(), 0)) {
+		LogError(u"av_frame_get_buffer"_q, error);
+		return false;
+	}
+	_resampledFrameCapacity = allocate;
+	return true;
+}
+
+bool AbstractAudioFFMpegLoader::changeSpeedFilter(float64 speed) {
+	speed = std::clamp(speed, kSpeedMin, kSpeedMax);
+	if (EqualSpeeds(_filterSpeed, speed)) {
+		return false;
+	}
+	avfilter_graph_free(&_filterGraph);
+	const auto guard = gsl::finally([&] {
+		if (!_filterGraph) {
+			_filteredFrame = nullptr;
+			_filterSpeed = 1.;
+		}
+	});
+	createSpeedFilter(speed);
+	return true;
+}
+
+void AbstractAudioFFMpegLoader::createSpeedFilter(float64 speed) {
+	Expects(!_filterGraph);
+
+	if (EqualSpeeds(speed, 1.)) {
+		return;
+	}
+	const auto abuffer = avfilter_get_by_name("abuffer");
+	const auto abuffersink = avfilter_get_by_name("abuffersink");
+	const auto atempo = avfilter_get_by_name("atempo");
+	if (!abuffer || !abuffersink || !atempo) {
+		LOG(("FFmpeg Error: Could not find abuffer / abuffersink /atempo."));
+		return;
+	}
+
+	auto graph = avfilter_graph_alloc();
+	if (!graph) {
+		LOG(("FFmpeg Error: Unable to create filter graph."));
+		return;
+	}
+	const auto guard = gsl::finally([&] {
+		avfilter_graph_free(&graph);
+	});
+
+	_filterSrc = avfilter_graph_alloc_filter(graph, abuffer, "src");
+	_atempo = avfilter_graph_alloc_filter(graph, atempo, "atempo");
+	_filterSink = avfilter_graph_alloc_filter(graph, abuffersink, "sink");
+	if (!_filterSrc || !atempo || !_filterSink) {
+		LOG(("FFmpeg Error: "
+			"Could not allocate abuffer / abuffersink /atempo."));
+		return;
+	}
+
+	char layout[64] = { 0 };
+	av_channel_layout_describe(
+		&_swrDstChannelLayout,
+		layout,
+		sizeof(layout));
+
+	av_opt_set(
+		_filterSrc,
+		"channel_layout",
+		layout,
+		AV_OPT_SEARCH_CHILDREN);
+	av_opt_set_sample_fmt(
+		_filterSrc,
+		"sample_fmt",
+		_swrDstSampleFormat,
+		AV_OPT_SEARCH_CHILDREN);
+	av_opt_set_q(
+		_filterSrc,
+		"time_base",
+		AVRational{ 1, _swrDstRate },
+		AV_OPT_SEARCH_CHILDREN);
+	av_opt_set_int(
+		_filterSrc,
+		"sample_rate",
+		_swrDstRate,
+		AV_OPT_SEARCH_CHILDREN);
+	av_opt_set_double(
+		_atempo,
+		"tempo",
+		speed,
+		AV_OPT_SEARCH_CHILDREN);
+
+	AvErrorWrap error = 0;
+	if ((error = avfilter_init_str(_filterSrc, nullptr))) {
+		LogError(u"avfilter_init_str(src)"_q, error);
+		return;
+	} else if ((error = avfilter_init_str(_atempo, nullptr))) {
+		LogError(u"avfilter_init_str(atempo)"_q, error);
+		avfilter_graph_free(&graph);
+		return;
+	} else if ((error = avfilter_init_str(_filterSink, nullptr))) {
+		LogError(u"avfilter_init_str(sink)"_q, error);
+		avfilter_graph_free(&graph);
+		return;
+	} else if ((error = avfilter_link(_filterSrc, 0, _atempo, 0))) {
+		LogError(u"avfilter_link(src->atempo)"_q, error);
+		avfilter_graph_free(&graph);
+		return;
+	} else if ((error = avfilter_link(_atempo, 0, _filterSink, 0))) {
+		LogError(u"avfilter_link(atempo->sink)"_q, error);
+		avfilter_graph_free(&graph);
+		return;
+	} else if ((error = avfilter_graph_config(graph, nullptr))) {
+		LogError("avfilter_link(atempo->sink)"_q, error);
+		avfilter_graph_free(&graph);
+		return;
+	}
+	_filterGraph = base::take(graph);
+	_filteredFrame = FFmpeg::MakeFramePointer();
+	_filterSpeed = speed;
+}
+
+void AbstractAudioFFMpegLoader::enqueueNormalFrame(
+		not_null<AVFrame*> frame,
+		int64 samples) {
+	if (_framesQueuedIndex >= 0) {
+		return;
+	}
+	if (!samples) {
+		samples = frame->nb_samples;
+	}
+	_framesQueued.push_back({
+		.position = startedAtSample() + _framesQueuedSamples,
+		.samples = samples,
+		.frame = FFmpeg::DuplicateFramePointer(frame),
+	});
+	_framesQueuedSamples += samples;
+}
+
+void AbstractAudioFFMpegLoader::enqueueFramesFinished() {
+	if (_framesQueuedIndex >= 0) {
+		return;
+	}
+	_framesQueued.push_back({
+		.position = startedAtSample() + _framesQueuedSamples,
+	});
+}
+
+auto AbstractAudioFFMpegLoader::readFromReadyFrame()
+-> ReadResult {
+	const auto raw = _frame.get();
+	if (frameHasDesiredFormat(raw)) {
+		if (!raw->nb_samples) {
+			return ReadError::Retry;
+		}
+		return readOrBufferForFilter(raw, raw->nb_samples);
+	} else if (!initResampleForFrame()) {
+		return ReadError::Other;
+	}
+
+	const auto maxSamples = av_rescale_rnd(
+		swr_get_delay(_swrContext, _swrSrcRate) + _frame->nb_samples,
+		_swrDstRate,
+		_swrSrcRate,
+		AV_ROUND_UP);
+	if (!ensureResampleSpaceAvailable(maxSamples)) {
+		return ReadError::Other;
+	}
+	const auto samples = swr_convert(
+		_swrContext,
+		(uint8_t**)_resampledFrame->extended_data,
+		maxSamples,
+		(const uint8_t **)_frame->extended_data,
+		_frame->nb_samples);
+	if (AvErrorWrap error = samples) {
+		LogError(u"swr_convert"_q, error);
+		return ReadError::Other;
+	} else if (!samples) {
+		return ReadError::Retry;
+	}
+	return readOrBufferForFilter(_resampledFrame.get(), samples);
+}
+
+auto AbstractAudioFFMpegLoader::readOrBufferForFilter(
+	not_null<AVFrame*> frame,
+	int64 samplesOverride)
+-> ReadResult {
+	enqueueNormalFrame(frame, samplesOverride);
+
+	const auto was = frame->nb_samples;
+	frame->nb_samples = samplesOverride;
+	const auto guard = gsl::finally([&] {
+		frame->nb_samples = was;
+	});
+
+	if (!_filterGraph) {
+		return bytes::const_span(
+			reinterpret_cast<const bytes::type*>(frame->extended_data[0]),
+			frame->nb_samples * _outputSampleSize);
+	}
+	AvErrorWrap error = av_buffersrc_add_frame_flags(
+		_filterSrc,
+		frame,
+		AV_BUFFERSRC_FLAG_KEEP_REF);
+	if (error) {
+		LogError(u"av_buffersrc_add_frame_flags"_q, error);
+		return ReadError::Other;
+	}
+	return ReadError::Retry;
+}
+
+AbstractAudioFFMpegLoader::~AbstractAudioFFMpegLoader() {
+	if (_filterGraph) {
+		avfilter_graph_free(&_filterGraph);
+	}
+	if (_swrContext) {
+		swr_free(&_swrContext);
+	}
+}
+
+FFMpegLoader::FFMpegLoader(
+	const Core::FileLocation &file,
+	const QByteArray &data,
+	bytes::vector &&buffer)
+: AbstractAudioFFMpegLoader(file, data, std::move(buffer)) {
+}
+
+bool FFMpegLoader::open(crl::time positionMs, float64 speed) {
+	return AbstractFFMpegLoader::open(positionMs)
+		&& openCodecContext()
+		&& initUsingContext(_codecContext, speed)
+		&& seekTo(positionMs);
+}
+
+bool FFMpegLoader::openCodecContext() {
+	_codecContext = avcodec_alloc_context3(nullptr);
+	if (!_codecContext) {
+		LOG(("Audio Error: "
+			"Unable to avcodec_alloc_context3 for file '%1', data size '%2'"
+			).arg(_file.name()
+			).arg(_data.size()
+			));
+		return false;
+	}
+
+	const auto stream = fmtContext->streams[streamId];
+	AvErrorWrap error = avcodec_parameters_to_context(
+		_codecContext,
+		stream->codecpar);
+	if (error) {
+		LogError(u"avcodec_parameters_to_context"_q, error);
+		return false;
+	}
+	_codecContext->pkt_timebase = stream->time_base;
+	av_opt_set_int(_codecContext, "refcounted_frames", 1, 0);
+
+	if (AvErrorWrap error = avcodec_open2(_codecContext, codec, 0)) {
+		LogError(u"avcodec_open2"_q, error);
+		return false;
+	}
+	return true;
+}
+
+bool FFMpegLoader::seekTo(crl::time positionMs) {
+	if (positionMs) {
+		const auto stream = fmtContext->streams[streamId];
+		const auto timeBase = stream->time_base;
+		const auto timeStamp = (positionMs * timeBase.den)
+			/ (1000LL * timeBase.num);
+		const auto flags1 = AVSEEK_FLAG_ANY;
+		if (av_seek_frame(fmtContext, streamId, timeStamp, flags1) < 0) {
+			const auto flags2 = 0;
+			if (av_seek_frame(fmtContext, streamId, timeStamp, flags2) < 0) {
+			}
+		}
+	}
+
+	return true;
+}
+
+FFMpegLoader::ReadResult FFMpegLoader::readMore() {
+	if (_readTillEnd) {
+		return ReadError::EndOfFile;
+	}
+	const auto readResult = readFromReadyContext(_codecContext);
+	if (readResult != ReadError::Wait) {
+		if (readResult == ReadError::EndOfFile) {
+			_readTillEnd = true;
+		}
+		return readResult;
+	}
+
+	if (AvErrorWrap error = av_read_frame(fmtContext, &_packet)) {
+		if (error.code() != AVERROR_EOF) {
+			LogError(u"av_read_frame"_q, error);
+			return ReadError::Other;
+		}
+		error = avcodec_send_packet(_codecContext, nullptr); // drain
+		if (!error) {
+			return ReadError::Retry;
+		}
+		LogError(u"avcodec_send_packet"_q, error);
+		return ReadError::Other;
+	}
+
+	if (_packet.stream_index == streamId) {
+		AvErrorWrap error = avcodec_send_packet(_codecContext, &_packet);
+		if (error) {
+			av_packet_unref(&_packet);
+			LogError(u"avcodec_send_packet"_q, error);
+			// There is a sample voice message where skipping such packet
+			// results in a crash (read_access to nullptr) in swr_convert().
+			//if (error.code() == AVERROR_INVALIDDATA) {
+			//	return ReadResult::Retry; // try to skip bad packet
+			//}
+			return ReadError::Other;
+		}
+	}
+	av_packet_unref(&_packet);
+	return ReadError::Retry;
+}
+
+FFMpegLoader::~FFMpegLoader() {
+	if (_codecContext) {
+		avcodec_free_context(&_codecContext);
+	}
+}
+
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_ffmpeg_loader.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_ffmpeg_loader.h
@@ -0,0 +1,197 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include "media/audio/media_audio.h"
+#include "media/audio/media_audio_loader.h"
+#include "media/streaming/media_streaming_utility.h"
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavutil/opt.h>
+#include <libswresample/swresample.h>
+#include <libavfilter/avfilter.h>
+} // extern "C"
+
+#include <al.h>
+
+namespace Core {
+class FileLocation;
+} // namespace Core
+
+namespace Media {
+
+class AbstractFFMpegLoader : public AudioPlayerLoader {
+public:
+	AbstractFFMpegLoader(
+		const Core::FileLocation &file,
+		const QByteArray &data,
+		bytes::vector &&buffer)
+	: AudioPlayerLoader(file, data, std::move(buffer)) {
+	}
+
+	bool open(crl::time positionMs, float64 speed = 1.) override;
+
+	crl::time duration() override {
+		return _duration;
+	}
+	void overrideDuration(int64 startedAtSample, crl::time duration) {
+		_startedAtSample = startedAtSample;
+		_duration = duration;
+	}
+
+	int samplesFrequency() override {
+		return _samplesFrequency;
+	}
+
+	[[nodiscard]] int64 startedAtSample() const {
+		return _startedAtSample;
+	}
+
+	~AbstractFFMpegLoader();
+
+protected:
+	static int64 Mul(int64 value, AVRational rational);
+
+	int _samplesFrequency = Media::Player::kDefaultFrequency;
+	int64 _startedAtSample = 0;
+	crl::time _duration = 0;
+
+	uchar *ioBuffer = nullptr;
+	AVIOContext *ioContext = nullptr;
+	AVFormatContext *fmtContext = nullptr;
+	const AVCodec *codec = nullptr;
+	int32 streamId = 0;
+
+	bool _opened = false;
+
+private:
+	static int ReadData(void *opaque, uint8_t *buf, int buf_size);
+	static int64_t SeekData(void *opaque, int64_t offset, int whence);
+	static int ReadBytes(void *opaque, uint8_t *buf, int buf_size);
+	static int64_t SeekBytes(void *opaque, int64_t offset, int whence);
+	static int ReadFile(void *opaque, uint8_t *buf, int buf_size);
+	static int64_t SeekFile(void *opaque, int64_t offset, int whence);
+
+};
+
+class AbstractAudioFFMpegLoader : public AbstractFFMpegLoader {
+public:
+	AbstractAudioFFMpegLoader(
+		const Core::FileLocation &file,
+		const QByteArray &data,
+		bytes::vector &&buffer);
+
+	void dropFramesTill(int64 samples) override;
+	int64 startReadingQueuedFrames(float64 newSpeed) override;
+
+	int samplesFrequency() override {
+		return _swrDstRate;
+	}
+
+	int sampleSize() override {
+		return _outputSampleSize;
+	}
+
+	int format() override {
+		return _outputFormat;
+	}
+
+	~AbstractAudioFFMpegLoader();
+
+protected:
+	bool initUsingContext(not_null<AVCodecContext*> context, float64 speed);
+	[[nodiscard]] ReadResult readFromReadyContext(
+		not_null<AVCodecContext*> context);
+
+	// Streaming player provides the first frame to the ChildFFMpegLoader
+	// so we replace our allocated frame with the one provided.
+	[[nodiscard]] ReadResult replaceFrameAndRead(FFmpeg::FramePointer frame);
+
+private:
+	struct EnqueuedFrame {
+		int64 position = 0;
+		int64 samples = 0;
+		FFmpeg::FramePointer frame;
+	};
+	[[nodiscard]] ReadResult readFromReadyFrame();
+	[[nodiscard]] ReadResult readOrBufferForFilter(
+		not_null<AVFrame*> frame,
+		int64 samplesOverride);
+	bool frameHasDesiredFormat(not_null<AVFrame*> frame) const;
+	bool initResampleForFrame();
+	bool initResampleUsingFormat();
+	bool ensureResampleSpaceAvailable(int samples);
+
+	bool changeSpeedFilter(float64 speed);
+	void createSpeedFilter(float64 speed);
+
+	void enqueueNormalFrame(
+		not_null<AVFrame*> frame,
+		int64 samples = 0);
+	void enqueueFramesFinished();
+	[[nodiscard]] auto fillFrameFromQueued()
+		-> std::variant<not_null<const EnqueuedFrame*>, ReadError>;
+
+	FFmpeg::FramePointer _frame;
+	FFmpeg::FramePointer _resampledFrame;
+	FFmpeg::FramePointer _filteredFrame;
+	int _resampledFrameCapacity = 0;
+
+	int64 _framesQueuedSamples = 0;
+	std::deque<EnqueuedFrame> _framesQueued;
+	int _framesQueuedIndex = -1;
+
+	int _outputFormat = AL_FORMAT_STEREO16;
+	int _outputChannels = 2;
+	int _outputSampleSize = 2 * sizeof(uint16);
+
+	SwrContext *_swrContext = nullptr;
+
+	int _swrSrcRate = 0;
+	AVSampleFormat _swrSrcSampleFormat = AV_SAMPLE_FMT_NONE;
+
+	const int _swrDstRate = Media::Player::kDefaultFrequency;
+	AVSampleFormat _swrDstSampleFormat = AV_SAMPLE_FMT_S16;
+
+	AVChannelLayout _swrSrcChannelLayout = AV_CHANNEL_LAYOUT_STEREO;
+	AVChannelLayout _swrDstChannelLayout = AV_CHANNEL_LAYOUT_STEREO;
+
+	AVFilterGraph *_filterGraph = nullptr;
+	float64 _filterSpeed = 1.;
+	AVFilterContext *_filterSrc = nullptr;
+	AVFilterContext *_atempo = nullptr;
+	AVFilterContext *_filterSink = nullptr;
+
+};
+
+class FFMpegLoader : public AbstractAudioFFMpegLoader {
+public:
+	FFMpegLoader(
+		const Core::FileLocation &file,
+		const QByteArray &data,
+		bytes::vector &&buffer);
+
+	bool open(crl::time positionMs, float64 speed = 1.) override;
+
+	ReadResult readMore() override;
+
+	~FFMpegLoader();
+
+private:
+	bool openCodecContext();
+	bool seekTo(crl::time positionMs);
+
+	AVCodecContext *_codecContext = nullptr;
+	AVPacket _packet;
+	bool _readTillEnd = false;
+
+};
+
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_loader.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio_loader.cpp
@@ -0,0 +1,97 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_audio_loader.h"
+
+namespace Media {
+
+AudioPlayerLoader::AudioPlayerLoader(
+	const Core::FileLocation &file,
+	const QByteArray &data,
+	bytes::vector &&buffer)
+: _file(file)
+, _data(data)
+, _bytes(std::move(buffer)) {
+}
+
+AudioPlayerLoader::~AudioPlayerLoader() {
+	if (_access) {
+		_file.accessDisable();
+		_access = false;
+	}
+}
+
+bool AudioPlayerLoader::check(
+		const Core::FileLocation &file,
+		const QByteArray &data) {
+	return (this->_file == file) && (this->_data.size() == data.size());
+}
+
+void AudioPlayerLoader::saveDecodedSamples(not_null<QByteArray*> samples) {
+	Expects(_savedSamples.isEmpty());
+	Expects(!_holdsSavedSamples);
+
+	samples->swap(_savedSamples);
+	_holdsSavedSamples = true;
+}
+
+void AudioPlayerLoader::takeSavedDecodedSamples(
+		not_null<QByteArray*> samples) {
+	Expects(samples->isEmpty());
+	Expects(_holdsSavedSamples);
+
+	samples->swap(_savedSamples);
+	_holdsSavedSamples = false;
+}
+
+bool AudioPlayerLoader::holdsSavedDecodedSamples() const {
+	return _holdsSavedSamples;
+}
+
+void AudioPlayerLoader::dropDecodedSamples() {
+	_savedSamples = {};
+	_holdsSavedSamples = false;
+}
+
+int AudioPlayerLoader::bytesPerBuffer() {
+	if (!_bytesPerBuffer) {
+		_bytesPerBuffer = samplesFrequency() * sampleSize();
+	}
+	return _bytesPerBuffer;
+}
+
+bool AudioPlayerLoader::openFile() {
+	if (_data.isEmpty() && _bytes.empty()) {
+		if (_f.isOpen()) _f.close();
+		if (!_access) {
+			if (!_file.accessEnable()) {
+				LOG(("Audio Error: could not open file access '%1', "
+					"data size '%2', error %3, %4"
+					).arg(_file.name()
+					).arg(_data.size()
+					).arg(_f.error()
+					).arg(_f.errorString()));
+				return false;
+			}
+			_access = true;
+		}
+		_f.setFileName(_file.name());
+		if (!_f.open(QIODevice::ReadOnly)) {
+			LOG(("Audio Error: could not open file '%1', "
+				"data size '%2', error %3, %4"
+				).arg(_file.name()
+				).arg(_data.size()
+				).arg(_f.error()
+				).arg(_f.errorString()));
+			return false;
+		}
+	}
+	_dataPos = 0;
+	return true;
+}
+
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_loader.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_loader.h
@@ -0,0 +1,87 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include "base/bytes.h"
+#include "core/file_location.h"
+#include "media/streaming/media_streaming_utility.h"
+
+namespace Media {
+
+class AudioPlayerLoader {
+public:
+	AudioPlayerLoader(
+		const Core::FileLocation &file,
+		const QByteArray &data,
+		bytes::vector &&buffer);
+	virtual ~AudioPlayerLoader();
+
+	virtual bool check(
+		const Core::FileLocation &file,
+		const QByteArray &data);
+
+	virtual bool open(crl::time positionMs, float64 speed = 1.) = 0;
+	virtual crl::time duration() = 0;
+	virtual int samplesFrequency() = 0;
+	virtual int sampleSize() = 0;
+	virtual int format() = 0;
+
+	virtual void dropFramesTill(int64 samples) {
+	}
+	[[nodiscard]] virtual int64 startReadingQueuedFrames(float64 newSpeed) {
+		Unexpected(
+			"startReadingQueuedFrames() on not AbstractAudioFFMpegLoader");
+	}
+
+	[[nodiscard]] int bytesPerBuffer();
+
+	enum class ReadError {
+		Other,
+		Retry,
+		RetryNotQueued,
+		Wait,
+		EndOfFile,
+	};
+	using ReadResult = std::variant<bytes::const_span, ReadError>;
+	[[nodiscard]] virtual ReadResult readMore() = 0;
+
+	virtual void enqueuePackets(std::deque<FFmpeg::Packet> &&packets) {
+		Unexpected("enqueuePackets() call on not ChildFFMpegLoader.");
+	}
+	virtual void setForceToBuffer(bool force) {
+		Unexpected("setForceToBuffer() call on not ChildFFMpegLoader.");
+	}
+	virtual bool forceToBuffer() const {
+		return false;
+	}
+
+	void saveDecodedSamples(not_null<QByteArray*> samples);
+	void takeSavedDecodedSamples(not_null<QByteArray*> samples);
+	bool holdsSavedDecodedSamples() const;
+	void dropDecodedSamples();
+
+protected:
+	Core::FileLocation _file;
+	bool _access = false;
+	QByteArray _data;
+	bytes::vector _bytes;
+
+	QFile _f;
+	int _dataPos = 0;
+
+	bool openFile();
+
+private:
+	QByteArray _savedSamples;
+	bool _holdsSavedSamples = false;
+
+	int _bytesPerBuffer = 0;
+
+};
+
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_loaders.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio_loaders.cpp
@@ -0,0 +1,587 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_audio_loaders.h"
+
+#include "media/audio/media_audio.h"
+#include "media/audio/media_audio_ffmpeg_loader.h"
+#include "media/audio/media_child_ffmpeg_loader.h"
+#include "media/media_common.h"
+
+namespace Media {
+namespace Player {
+
+Loaders::Loaders(QThread *thread)
+: _fromExternalNotify([=] { videoSoundAdded(); }) {
+	moveToThread(thread);
+	_fromExternalNotify.moveToThread(thread);
+	connect(thread, SIGNAL(started()), this, SLOT(onInit()));
+	connect(thread, SIGNAL(finished()), this, SLOT(deleteLater()));
+}
+
+void Loaders::feedFromExternal(ExternalSoundPart &&part) {
+	auto invoke = false;
+	{
+		QMutexLocker lock(&_fromExternalMutex);
+		invoke = _fromExternalQueues.empty()
+			&& _fromExternalForceToBuffer.empty();
+		auto &queue = _fromExternalQueues[part.audio];
+		queue.insert(
+			end(queue),
+			std::make_move_iterator(part.packets.begin()),
+			std::make_move_iterator(part.packets.end()));
+	}
+	if (invoke) {
+		_fromExternalNotify.call();
+	}
+}
+
+void Loaders::forceToBufferExternal(const AudioMsgId &audioId) {
+	auto invoke = false;
+	{
+		QMutexLocker lock(&_fromExternalMutex);
+		invoke = _fromExternalQueues.empty()
+			&& _fromExternalForceToBuffer.empty();
+		_fromExternalForceToBuffer.emplace(audioId);
+	}
+	if (invoke) {
+		_fromExternalNotify.call();
+	}
+}
+
+void Loaders::videoSoundAdded() {
+	auto queues = decltype(_fromExternalQueues)();
+	auto forces = decltype(_fromExternalForceToBuffer)();
+	{
+		QMutexLocker lock(&_fromExternalMutex);
+		queues = base::take(_fromExternalQueues);
+		forces = base::take(_fromExternalForceToBuffer);
+	}
+	for (const auto &audioId : forces) {
+		const auto tryLoader = [&](const auto &id, auto &loader) {
+			if (audioId == id && loader) {
+				loader->setForceToBuffer(true);
+				if (loader->holdsSavedDecodedSamples()
+					&& !queues.contains(audioId)) {
+					loadData(audioId);
+				}
+				return true;
+			}
+			return false;
+		};
+		tryLoader(_audio, _audioLoader)
+			|| tryLoader(_song, _songLoader)
+			|| tryLoader(_video, _videoLoader);
+	}
+	for (auto &pair : queues) {
+		const auto audioId = pair.first;
+		auto &packets = pair.second;
+		const auto tryLoader = [&](const auto &id, auto &loader) {
+			if (id == audioId && loader) {
+				loader->enqueuePackets(std::move(packets));
+				if (loader->holdsSavedDecodedSamples()) {
+					loadData(audioId);
+				}
+				return true;
+			}
+			return false;
+		};
+		tryLoader(_audio, _audioLoader)
+			|| tryLoader(_song, _songLoader)
+			|| tryLoader(_video, _videoLoader);
+	}
+}
+
+void Loaders::onInit() {
+}
+
+void Loaders::onStart(const AudioMsgId &audio, qint64 positionMs) {
+	auto type = audio.type();
+	clear(type);
+	{
+		QMutexLocker lock(internal::audioPlayerMutex());
+		if (!mixer()) return;
+
+		auto track = mixer()->trackForType(type);
+		if (!track) return;
+
+		track->loading = true;
+	}
+
+	loadData(audio, positionMs);
+}
+
+AudioMsgId Loaders::clear(AudioMsgId::Type type) {
+	AudioMsgId result;
+	switch (type) {
+	case AudioMsgId::Type::Voice:
+		std::swap(result, _audio);
+		_audioLoader = nullptr;
+		break;
+	case AudioMsgId::Type::Song:
+		std::swap(result, _song);
+		_songLoader = nullptr;
+		break;
+	case AudioMsgId::Type::Video:
+		std::swap(result, _video);
+		_videoLoader = nullptr;
+		break;
+	}
+	return result;
+}
+
+void Loaders::setStoppedState(Mixer::Track *track, State state) {
+	mixer()->setStoppedState(track, state);
+}
+
+void Loaders::emitError(AudioMsgId::Type type) {
+	error(clear(type));
+}
+
+void Loaders::onLoad(const AudioMsgId &audio) {
+	loadData(audio);
+}
+
+void Loaders::loadData(AudioMsgId audio, crl::time positionMs) {
+	auto type = audio.type();
+	auto setup = setupLoader(audio, positionMs);
+	const auto l = setup.loader;
+	if (!l) {
+		if (setup.errorAtStart) {
+			emitError(type);
+		}
+		return;
+	}
+
+	const auto sampleSize = l->sampleSize();
+	const auto speedChanged = !EqualSpeeds(setup.newSpeed, setup.oldSpeed);
+	auto updatedWithSpeed = speedChanged
+		? rebufferOnSpeedChange(setup)
+		: std::optional<Mixer::Track::WithSpeed>();
+	if (!speedChanged && setup.oldSpeed > 0.) {
+		const auto normalPosition = Mixer::Track::SpeedIndependentPosition(
+			setup.position,
+			setup.oldSpeed);
+		l->dropFramesTill(normalPosition);
+	}
+
+	const auto started = setup.justStarted;
+	auto finished = false;
+	auto waiting = false;
+	auto errAtStart = started;
+
+	auto accumulated = QByteArray();
+	auto accumulatedCount = 0;
+	if (l->holdsSavedDecodedSamples()) {
+		l->takeSavedDecodedSamples(&accumulated);
+		accumulatedCount = accumulated.size() / sampleSize;
+	}
+	const auto accumulateTill = l->bytesPerBuffer();
+	while (accumulated.size() < accumulateTill) {
+		using Error = AudioPlayerLoader::ReadError;
+		const auto result = l->readMore();
+		if (result == Error::Retry) {
+			continue;
+		}
+		const auto sampleBytes = v::is<bytes::const_span>(result)
+			? v::get<bytes::const_span>(result)
+			: bytes::const_span();
+		if (!sampleBytes.empty()) {
+			accumulated.append(
+				reinterpret_cast<const char*>(sampleBytes.data()),
+				sampleBytes.size());
+			accumulatedCount += sampleBytes.size() / sampleSize;
+		} else if (result == Error::Other) {
+			if (errAtStart) {
+				{
+					QMutexLocker lock(internal::audioPlayerMutex());
+					if (auto track = checkLoader(type)) {
+						track->state.state = State::StoppedAtStart;
+					}
+				}
+				emitError(type);
+				return;
+			}
+			finished = true;
+			break;
+		} else if (result == Error::EndOfFile) {
+			finished = true;
+			break;
+		} else if (result == Error::Wait) {
+			waiting = (accumulated.size() < accumulateTill)
+				&& (accumulated.isEmpty() || !l->forceToBuffer());
+			if (waiting) {
+				l->saveDecodedSamples(&accumulated);
+			}
+			break;
+		} else if (v::is<bytes::const_span>(result)) {
+			errAtStart = false;
+		}
+
+		QMutexLocker lock(internal::audioPlayerMutex());
+		if (!checkLoader(type)) {
+			clear(type);
+			return;
+		}
+	}
+
+	QMutexLocker lock(internal::audioPlayerMutex());
+	auto track = checkLoader(type);
+	if (!track) {
+		clear(type);
+		return;
+	}
+
+	if (started || !accumulated.isEmpty() || updatedWithSpeed) {
+		Audio::AttachToDevice();
+	}
+	if (started) {
+		Assert(!updatedWithSpeed);
+		track->started();
+		if (!internal::audioCheckError()) {
+			setStoppedState(track, State::StoppedAtStart);
+			emitError(type);
+			return;
+		}
+
+		track->format = l->format();
+		track->state.frequency = l->samplesFrequency();
+
+		track->state.position = (positionMs * track->state.frequency)
+			/ 1000LL;
+		track->updateWithSpeedPosition();
+		track->withSpeed.bufferedPosition = track->withSpeed.position;
+		track->withSpeed.fadeStartPosition = track->withSpeed.position;
+	} else if (updatedWithSpeed) {
+		auto old = Mixer::Track();
+		old.stream = base::take(track->stream);
+		old.withSpeed = std::exchange(track->withSpeed, *updatedWithSpeed);
+		track->speed = setup.newSpeed;
+		track->reattach(type);
+		old.detach();
+	}
+	if (!accumulated.isEmpty()) {
+		track->ensureStreamCreated(type);
+
+		auto bufferIndex = track->getNotQueuedBufferIndex();
+
+		if (!internal::audioCheckError()) {
+			setStoppedState(track, State::StoppedAtError);
+			emitError(type);
+			return;
+		}
+
+		if (bufferIndex < 0) { // No free buffers, wait.
+			track->waitingForBuffer = true;
+			l->saveDecodedSamples(&accumulated);
+			return;
+		} else if (l->forceToBuffer()) {
+			l->setForceToBuffer(false);
+		}
+		track->waitingForBuffer = false;
+
+		track->withSpeed.buffered[bufferIndex] = accumulated;
+		track->withSpeed.samples[bufferIndex] = accumulatedCount;
+		track->withSpeed.bufferedLength += accumulatedCount;
+		alBufferData(
+			track->stream.buffers[bufferIndex],
+			track->format,
+			accumulated.constData(),
+			accumulated.size(),
+			track->state.frequency);
+
+		alSourceQueueBuffers(
+			track->stream.source,
+			1,
+			track->stream.buffers + bufferIndex);
+
+		if (!internal::audioCheckError()) {
+			setStoppedState(track, State::StoppedAtError);
+			emitError(type);
+			return;
+		}
+	} else {
+		if (waiting) {
+			return;
+		}
+		finished = true;
+	}
+	track->state.waitingForData = false;
+
+	if (finished) {
+		track->loaded = true;
+		track->withSpeed.length = track->withSpeed.bufferedPosition
+			+ track->withSpeed.bufferedLength;
+		track->state.length = Mixer::Track::SpeedIndependentPosition(
+			track->withSpeed.length,
+			track->speed);
+	}
+
+	track->loading = false;
+	if (IsPausedOrPausing(track->state.state)
+		|| IsStoppedOrStopping(track->state.state)) {
+		return;
+	}
+	ALint state = AL_INITIAL;
+	alGetSourcei(track->stream.source, AL_SOURCE_STATE, &state);
+	if (!internal::audioCheckError()) {
+		setStoppedState(track, State::StoppedAtError);
+		emitError(type);
+		return;
+	}
+
+	if (state == AL_PLAYING) {
+		return;
+	} else if (state == AL_STOPPED && !internal::CheckAudioDeviceConnected()) {
+		return;
+	}
+
+	alSourcef(track->stream.source, AL_GAIN, ComputeVolume(type));
+	if (!internal::audioCheckError()) {
+		setStoppedState(track, State::StoppedAtError);
+		emitError(type);
+		return;
+	}
+
+	if (state == AL_STOPPED) {
+		alSourcei(
+			track->stream.source,
+			AL_SAMPLE_OFFSET,
+			qMax(track->withSpeed.position - track->withSpeed.bufferedPosition, 0LL));
+		if (!internal::audioCheckError()) {
+			setStoppedState(track, State::StoppedAtError);
+			emitError(type);
+			return;
+		}
+	}
+	alSourcePlay(track->stream.source);
+	if (!internal::audioCheckError()) {
+		setStoppedState(track, State::StoppedAtError);
+		emitError(type);
+		return;
+	}
+
+	needToCheck();
+}
+
+Loaders::SetupLoaderResult Loaders::setupLoader(
+		const AudioMsgId &audio,
+		crl::time positionMs) {
+	QMutexLocker lock(internal::audioPlayerMutex());
+	if (!mixer()) {
+		return {};
+	}
+
+	auto track = mixer()->trackForType(audio.type());
+	if (!track || track->state.id != audio || !track->loading) {
+		error(audio);
+		LOG(("Audio Error: trying to load part of audio, that is not current at the moment"));
+		return {};
+	}
+
+	bool isGoodId = false;
+	AudioPlayerLoader *l = nullptr;
+	switch (audio.type()) {
+	case AudioMsgId::Type::Voice: l = _audioLoader.get(); isGoodId = (_audio == audio); break;
+	case AudioMsgId::Type::Song: l = _songLoader.get(); isGoodId = (_song == audio); break;
+	case AudioMsgId::Type::Video: l = _videoLoader.get(); isGoodId = (_video == audio); break;
+	}
+
+	if (l && (!isGoodId || !l->check(track->file, track->data))) {
+		clear(audio.type());
+		l = nullptr;
+	}
+
+	auto SpeedDependentPosition = Mixer::Track::SpeedDependentPosition;
+	if (!l) {
+		std::unique_ptr<AudioPlayerLoader> *loader = nullptr;
+		switch (audio.type()) {
+		case AudioMsgId::Type::Voice: _audio = audio; loader = &_audioLoader; break;
+		case AudioMsgId::Type::Song: _song = audio; loader = &_songLoader; break;
+		case AudioMsgId::Type::Video: _video = audio; loader = &_videoLoader; break;
+		}
+
+		if (audio.externalPlayId()) {
+			if (!track->externalData) {
+				clear(audio.type());
+				track->state.state = State::StoppedAtError;
+				error(audio);
+				LOG(("Audio Error: video sound data not ready"));
+				return {};
+			}
+			*loader = std::make_unique<ChildFFMpegLoader>(
+				std::move(track->externalData));
+		} else {
+			*loader = std::make_unique<FFMpegLoader>(
+				track->file,
+				track->data,
+				bytes::vector());
+		}
+		l = loader->get();
+
+		track->speed = track->nextSpeed;
+		if (!l->open(positionMs, track->speed)) {
+			track->state.state = State::StoppedAtStart;
+			return { .errorAtStart = true };
+		}
+		const auto duration = l->duration();
+		if (duration <= 0) {
+			track->state.state = State::StoppedAtStart;
+			return { .errorAtStart = true };
+		}
+		track->state.frequency = l->samplesFrequency();
+		track->state.length = (duration * track->state.frequency) / 1000;
+		track->withSpeed.length = SpeedDependentPosition(
+			track->state.length,
+			track->speed);
+		return { .loader = l, .justStarted = true };
+	} else if (!EqualSpeeds(track->nextSpeed, track->speed)) {
+		return {
+			.loader = l,
+			.oldSpeed = track->speed,
+			.newSpeed = track->nextSpeed,
+			.fadeStartPosition = track->withSpeed.fadeStartPosition,
+			.position = track->withSpeed.fineTunedPosition,
+			.normalLength = track->state.length,
+			.frequency = track->state.frequency,
+		};
+	} else if (track->loaded) {
+		LOG(("Audio Error: trying to load part of audio, that is already loaded to the end"));
+		return {};
+	}
+	return {
+		.loader = l,
+		.oldSpeed = track->speed,
+		.newSpeed = track->nextSpeed,
+		.position = track->withSpeed.fineTunedPosition,
+		.frequency = track->state.frequency,
+	};
+}
+
+Mixer::Track::WithSpeed Loaders::rebufferOnSpeedChange(
+		const SetupLoaderResult &setup) {
+	Expects(setup.oldSpeed > 0. && setup.newSpeed > 0.);
+	Expects(setup.loader != nullptr);
+
+	const auto speed = setup.newSpeed;
+	const auto change = setup.oldSpeed / speed;
+	const auto normalPosition = Mixer::Track::SpeedIndependentPosition(
+		setup.position,
+		setup.oldSpeed);
+	const auto newPosition = int64(base::SafeRound(setup.position * change));
+	auto result = Mixer::Track::WithSpeed{
+		.fineTunedPosition = newPosition,
+		.position = newPosition,
+		.length = Mixer::Track::SpeedDependentPosition(
+			setup.normalLength,
+			speed),
+		.fadeStartPosition = int64(
+			base::SafeRound(setup.fadeStartPosition * change)),
+	};
+	const auto l = setup.loader;
+	l->dropFramesTill(normalPosition);
+	const auto normalFrom = l->startReadingQueuedFrames(speed);
+	if (normalFrom < 0) {
+		result.bufferedPosition = newPosition;
+		return result;
+	}
+
+	result.bufferedPosition = Mixer::Track::SpeedDependentPosition(
+		normalFrom,
+		speed);
+	for (auto i = 0; i != Mixer::Track::kBuffersCount; ++i) {
+		auto finished = false;
+		auto accumulated = QByteArray();
+		auto accumulatedCount = int64();
+		const auto sampleSize = l->sampleSize();
+		const auto accumulateTill = l->bytesPerBuffer();
+		while (accumulated.size() < accumulateTill) {
+			const auto result = l->readMore();
+			const auto sampleBytes = v::is<bytes::const_span>(result)
+				? v::get<bytes::const_span>(result)
+				: bytes::const_span();
+			if (!sampleBytes.empty()) {
+				accumulated.append(
+					reinterpret_cast<const char*>(sampleBytes.data()),
+					sampleBytes.size());
+				accumulatedCount += sampleBytes.size() / sampleSize;
+				continue;
+			} else if (result == AudioPlayerLoader::ReadError::Retry) {
+				continue;
+			}
+			Assert(result == AudioPlayerLoader::ReadError::RetryNotQueued
+				|| result == AudioPlayerLoader::ReadError::EndOfFile);
+			finished = true;
+			break;
+		}
+		if (!accumulated.isEmpty()) {
+			result.samples[i] = accumulatedCount;
+			result.bufferedLength += accumulatedCount;
+			result.buffered[i] = accumulated;
+		}
+		if (finished) {
+			break;
+		}
+	}
+
+	const auto limit = result.bufferedPosition + result.bufferedLength;
+	if (newPosition > limit) {
+		result.fineTunedPosition = limit;
+		result.position = limit;
+	}
+	if (limit > result.length) {
+		result.length = limit;
+	}
+
+	return result;
+}
+
+Mixer::Track *Loaders::checkLoader(AudioMsgId::Type type) {
+	if (!mixer()) return nullptr;
+
+	auto track = mixer()->trackForType(type);
+	auto isGoodId = false;
+	AudioPlayerLoader *l = nullptr;
+	switch (type) {
+	case AudioMsgId::Type::Voice: l = _audioLoader.get(); isGoodId = (track->state.id == _audio); break;
+	case AudioMsgId::Type::Song: l = _songLoader.get(); isGoodId = (track->state.id == _song); break;
+	case AudioMsgId::Type::Video: l = _videoLoader.get(); isGoodId = (track->state.id == _video); break;
+	}
+	if (!l || !track) return nullptr;
+
+	if (!isGoodId || !track->loading || !l->check(track->file, track->data)) {
+		LOG(("Audio Error: playing changed while loading"));
+		return nullptr;
+	}
+
+	return track;
+}
+
+void Loaders::onCancel(const AudioMsgId &audio) {
+	Expects(audio.type() != AudioMsgId::Type::Unknown);
+
+	switch (audio.type()) {
+	case AudioMsgId::Type::Voice: if (_audio == audio) clear(audio.type()); break;
+	case AudioMsgId::Type::Song: if (_song == audio) clear(audio.type()); break;
+	case AudioMsgId::Type::Video: if (_video == audio) clear(audio.type()); break;
+	}
+
+	QMutexLocker lock(internal::audioPlayerMutex());
+	if (!mixer()) return;
+
+	for (auto i = 0; i != kTogetherLimit; ++i) {
+		auto track = mixer()->trackForType(audio.type(), i);
+		if (track->state.id == audio) {
+			track->loading = false;
+		}
+	}
+}
+
+Loaders::~Loaders() = default;
+
+} // namespace Player
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_loaders.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_loaders.h
@@ -0,0 +1,81 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include "media/audio/media_audio.h"
+#include "media/audio/media_child_ffmpeg_loader.h"
+
+class AudioPlayerLoader;
+class ChildFFMpegLoader;
+
+namespace Media {
+namespace Player {
+
+class Loaders : public QObject {
+	Q_OBJECT
+
+public:
+	Loaders(QThread *thread);
+	void feedFromExternal(ExternalSoundPart &&part);
+	void forceToBufferExternal(const AudioMsgId &audioId);
+	~Loaders();
+
+Q_SIGNALS:
+	void error(const AudioMsgId &audio);
+	void needToCheck();
+
+public Q_SLOTS:
+	void onInit();
+
+	void onStart(const AudioMsgId &audio, qint64 positionMs);
+	void onLoad(const AudioMsgId &audio);
+	void onCancel(const AudioMsgId &audio);
+
+private:
+	struct SetupLoaderResult {
+		AudioPlayerLoader *loader = nullptr;
+		float64 oldSpeed = 0.;
+		float64 newSpeed = 0.;
+		int64 fadeStartPosition = 0;
+		int64 position = 0;
+		int64 normalLength = 0;
+		int frequency = 0;
+		bool errorAtStart = false;
+		bool justStarted = false;
+	};
+
+	void videoSoundAdded();
+	[[nodiscard]] Mixer::Track::WithSpeed rebufferOnSpeedChange(
+		const SetupLoaderResult &setup);
+
+	void emitError(AudioMsgId::Type type);
+	AudioMsgId clear(AudioMsgId::Type type);
+	void setStoppedState(Mixer::Track *m, State state = State::Stopped);
+
+	void loadData(AudioMsgId audio, crl::time positionMs = 0);
+	[[nodiscard]] SetupLoaderResult setupLoader(
+		const AudioMsgId &audio,
+		crl::time positionMs);
+	Mixer::Track *checkLoader(AudioMsgId::Type type);
+
+	AudioMsgId _audio, _song, _video;
+	std::unique_ptr<AudioPlayerLoader> _audioLoader;
+	std::unique_ptr<AudioPlayerLoader> _songLoader;
+	std::unique_ptr<AudioPlayerLoader> _videoLoader;
+
+	QMutex _fromExternalMutex;
+	base::flat_map<
+		AudioMsgId,
+		std::deque<FFmpeg::Packet>> _fromExternalQueues;
+	base::flat_set<AudioMsgId> _fromExternalForceToBuffer;
+	SingleQueuedInvokation _fromExternalNotify;
+
+};
+
+} // namespace Player
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_local_cache.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio_local_cache.cpp
@@ -0,0 +1,339 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_audio_local_cache.h"
+
+#include "ffmpeg/ffmpeg_bytes_io_wrap.h"
+#include "ffmpeg/ffmpeg_utility.h"
+
+namespace Media::Audio {
+namespace {
+
+constexpr auto kMaxDuration = 3 * crl::time(1000);
+constexpr auto kMaxStreams = 2;
+constexpr auto kFrameSize = 4096;
+
+[[nodiscard]] QByteArray ConvertAndCut(const QByteArray &bytes) {
+	using namespace FFmpeg;
+
+	if (bytes.isEmpty()) {
+		return {};
+	}
+
+	auto wrap = ReadBytesWrap{
+		.size = bytes.size(),
+		.data = reinterpret_cast<const uchar*>(bytes.constData()),
+	};
+
+	auto input = MakeFormatPointer(
+		&wrap,
+		&ReadBytesWrap::Read,
+		nullptr,
+		&ReadBytesWrap::Seek);
+	if (!input) {
+		return {};
+	}
+
+	auto error = AvErrorWrap(avformat_find_stream_info(input.get(), 0));
+	if (error) {
+		LogError(u"avformat_find_stream_info"_q, error);
+		return {};
+	}
+
+
+	auto inCodec = (const AVCodec*)nullptr;
+	const auto streamId = av_find_best_stream(
+		input.get(),
+		AVMEDIA_TYPE_AUDIO,
+		-1,
+		-1,
+		&inCodec,
+		0);
+	if (streamId < 0) {
+		LogError(u"av_find_best_stream"_q, AvErrorWrap(streamId));
+		return {};
+	}
+
+	auto inStream = input->streams[streamId];
+	auto inCodecPar = inStream->codecpar;
+	auto inCodecContext = CodecPointer(avcodec_alloc_context3(nullptr));
+	if (!inCodecContext) {
+		return {};
+	}
+
+	if (avcodec_parameters_to_context(inCodecContext.get(), inCodecPar) < 0) {
+		return {};
+	}
+
+	if (avcodec_open2(inCodecContext.get(), inCodec, nullptr) < 0) {
+		return {};
+	}
+
+	auto result = WriteBytesWrap();
+	auto outFormat = MakeWriteFormatPointer(
+		static_cast<void*>(&result),
+		nullptr,
+		&WriteBytesWrap::Write,
+		&WriteBytesWrap::Seek,
+		"wav"_q);
+	if (!outFormat) {
+		return {};
+	}
+
+	// Find and open output codec
+	auto outCodec = avcodec_find_encoder(AV_CODEC_ID_PCM_S16LE);
+	if (!outCodec) {
+		return {};
+	}
+
+	auto outStream = avformat_new_stream(outFormat.get(), outCodec);
+	if (!outStream) {
+		return {};
+	}
+
+	auto outCodecContext = CodecPointer(
+		avcodec_alloc_context3(outCodec));
+	if (!outCodecContext) {
+		return {};
+	}
+
+	auto mono = AVChannelLayout(AV_CHANNEL_LAYOUT_MONO);
+	auto stereo = AVChannelLayout(AV_CHANNEL_LAYOUT_STEREO);
+	const auto in = &inCodecContext->ch_layout;
+	if (!av_channel_layout_compare(in, &mono)
+		|| !av_channel_layout_compare(in, &stereo)) {
+		av_channel_layout_copy(&outCodecContext->ch_layout, in);
+	} else {
+		outCodecContext->ch_layout = AV_CHANNEL_LAYOUT_STEREO;
+	}
+	const auto rate = 44'100;
+	outCodecContext->sample_fmt = AV_SAMPLE_FMT_S16;
+	outCodecContext->time_base = AVRational{ 1, rate };
+	outCodecContext->sample_rate = rate;
+
+	error = avcodec_open2(outCodecContext.get(), outCodec, nullptr);
+	if (error) {
+		LogError("avcodec_open2", error);
+		return {};
+	}
+
+	error = avcodec_parameters_from_context(
+		outStream->codecpar,
+		outCodecContext.get());
+	if (error) {
+		LogError("avcodec_parameters_from_context", error);
+		return {};
+	}
+
+	error = avformat_write_header(outFormat.get(), nullptr);
+	if (error) {
+		LogError("avformat_write_header", error);
+		return {};
+	}
+
+	auto swrContext = MakeSwresamplePointer(
+		&inCodecContext->ch_layout,
+		inCodecContext->sample_fmt,
+		inCodecContext->sample_rate,
+		&outCodecContext->ch_layout,
+		outCodecContext->sample_fmt,
+		outCodecContext->sample_rate);
+	if (!swrContext) {
+		return {};
+	}
+
+	auto packet = av_packet_alloc();
+	const auto guard = gsl::finally([&] {
+		av_packet_free(&packet);
+	});
+
+	auto frame = MakeFramePointer();
+	if (!frame) {
+		return {};
+	}
+
+	auto outFrame = MakeFramePointer();
+	if (!outFrame) {
+		return {};
+	}
+
+	outFrame->nb_samples = kFrameSize;
+	outFrame->format = outCodecContext->sample_fmt;
+	av_channel_layout_copy(
+		&outFrame->ch_layout,
+		&outCodecContext->ch_layout);
+	outFrame->sample_rate = outCodecContext->sample_rate;
+
+	error = av_frame_get_buffer(outFrame.get(), 0);
+	if (error) {
+		LogError("av_frame_get_buffer", error);
+		return {};
+	}
+
+	auto pts = int64_t(0);
+	auto maxPts = int64_t(kMaxDuration) * rate / 1000;
+	const auto writeFrame = [&](AVFrame *frame) { // nullptr to flush
+		error = avcodec_send_frame(outCodecContext.get(), frame);
+		if (error) {
+			LogError("avcodec_send_frame", error);
+			return error;
+		}
+		auto pkt = av_packet_alloc();
+		const auto guard = gsl::finally([&] {
+			av_packet_free(&pkt);
+		});
+		while (true) {
+			error = avcodec_receive_packet(outCodecContext.get(), pkt);
+			if (error) {
+				if (error.code() != AVERROR(EAGAIN)
+					&& error.code() != AVERROR_EOF) {
+					LogError("avcodec_receive_packet", error);
+				}
+				return error;
+			}
+			pkt->stream_index = outStream->index;
+			av_packet_rescale_ts(
+				pkt,
+				outCodecContext->time_base,
+				outStream->time_base);
+			error = av_interleaved_write_frame(outFormat.get(), pkt);
+			if (error) {
+				LogError("av_interleaved_write_frame", error);
+				return error;
+			}
+		}
+	};
+
+	while (pts < maxPts) {
+		error = av_read_frame(input.get(), packet);
+		const auto finished = (error.code() == AVERROR_EOF);
+		if (!finished) {
+			if (error) {
+				LogError("av_read_frame", error);
+				return {};
+			}
+			auto guard = gsl::finally([&] {
+				av_packet_unref(packet);
+			});
+			if (packet->stream_index != streamId) {
+				continue;
+			}
+			error = avcodec_send_packet(inCodecContext.get(), packet);
+			if (error) {
+				LogError("avcodec_send_packet", error);
+				return {};
+			}
+		}
+
+		while (true) {
+			error = avcodec_receive_frame(inCodecContext.get(), frame.get());
+			if (error) {
+				if (error.code() == AVERROR(EAGAIN)
+					|| error.code() == AVERROR_EOF) {
+					break;
+				} else {
+					LogError("avcodec_receive_frame", error);
+					return {};
+				}
+			}
+			error = swr_convert(
+				swrContext.get(),
+				outFrame->data,
+				kFrameSize,
+				(const uint8_t**)frame->data,
+				frame->nb_samples);
+			if (error) {
+				LogError("swr_convert", error);
+				return {};
+			}
+			const auto samples = error.code();
+			if (!samples) {
+				continue;
+			}
+
+			outFrame->nb_samples = samples;
+			outFrame->pts = pts;
+			pts += samples;
+			if (pts > maxPts) {
+				break;
+			}
+
+			error = writeFrame(outFrame.get());
+			if (error && error.code() != AVERROR(EAGAIN)) {
+				return {};
+			}
+		}
+
+		if (finished) {
+			break;
+		}
+	}
+	error = writeFrame(nullptr);
+	if (error && error.code() != AVERROR_EOF) {
+		return {};
+	}
+	error = av_write_trailer(outFormat.get());
+	if (error) {
+		LogError("av_write_trailer", error);
+		return {};
+	}
+	return result.content;
+}
+
+} // namespace
+
+LocalSound LocalCache::sound(
+		DocumentId id,
+		Fn<QByteArray()> resolveOriginalBytes,
+		Fn<QByteArray()> fallbackOriginalBytes) {
+	auto &result = _cache[id];
+	if (!result.isEmpty()) {
+		return { id, result };
+	}
+	result = ConvertAndCut(resolveOriginalBytes());
+	return !result.isEmpty()
+		? LocalSound{ id, result }
+		: fallbackOriginalBytes
+		? sound(0, fallbackOriginalBytes, nullptr)
+		: LocalSound();
+}
+
+LocalDiskCache::LocalDiskCache(const QString &folder)
+: _base(folder + '/') {
+	QDir().mkpath(_base);
+}
+
+QString LocalDiskCache::name(const LocalSound &sound) {
+	if (!sound) {
+		return {};
+	}
+	const auto i = _paths.find(sound.id);
+	if (i != end(_paths)) {
+		return i->second;
+	}
+
+	auto result = u"TD_%1"_q.arg(sound.id
+		? QString::number(sound.id, 16).toUpper()
+		: u"Default"_q);
+	const auto path = _base + u"%1.wav"_q.arg(result);
+
+	auto f = QFile(path);
+	if (f.open(QIODevice::WriteOnly)) {
+		f.write(sound.wav);
+		f.close();
+	}
+
+	_paths.emplace(sound.id, result);
+	return result;
+}
+
+QString LocalDiskCache::path(const LocalSound &sound) {
+	const auto part = name(sound);
+	return part.isEmpty() ? QString() : _base + part + u".wav"_q;
+}
+
+} // namespace Media::Audio
--- a/Telegram/SourceFiles/media/audio/media_audio_local_cache.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_local_cache.h
@@ -0,0 +1,46 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+namespace Media::Audio {
+
+struct LocalSound {
+    DocumentId id = 0;
+    QByteArray wav;
+
+    explicit operator bool() const {
+        return !wav.isEmpty();
+    }
+};
+
+class LocalCache final {
+public:
+    [[nodiscard]] LocalSound sound(
+        DocumentId id,
+        Fn<QByteArray()> resolveOriginalBytes,
+        Fn<QByteArray()> fallbackOriginalBytes);
+
+private:
+    base::flat_map<DocumentId, QByteArray> _cache;
+
+};
+
+class LocalDiskCache final {
+public:
+    explicit LocalDiskCache(const QString &folder);
+
+    [[nodiscard]] QString name(const LocalSound &sound);
+    [[nodiscard]] QString path(const LocalSound &sound);
+
+private:
+    const QString _base;
+	base::flat_map<DocumentId, QString> _paths;
+
+};
+
+} // namespace Media::Audio
--- a/Telegram/SourceFiles/media/audio/media_audio_track.cpp
+++ b/Telegram/SourceFiles/media/audio/media_audio_track.cpp
@@ -0,0 +1,372 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_audio_track.h"
+
+#include "media/audio/media_audio_ffmpeg_loader.h"
+#include "media/audio/media_audio.h"
+#include "core/application.h"
+#include "core/core_settings.h"
+#include "core/file_location.h"
+
+#include <al.h>
+#include <alc.h>
+
+namespace Media {
+namespace Audio {
+namespace {
+
+constexpr auto kMaxFileSize = 10 * 1024 * 1024;
+constexpr auto kDetachDeviceTimeout = crl::time(500); // destroy the audio device after 500ms of silence
+constexpr auto kTrackUpdateTimeout = crl::time(100);
+
+ALuint CreateSource() {
+	auto source = ALuint(0);
+	alGenSources(1, &source);
+	alSourcef(source, AL_PITCH, 1.f);
+	alSourcef(source, AL_GAIN, 1.f);
+	alSource3f(source, AL_POSITION, 0, 0, 0);
+	alSource3f(source, AL_VELOCITY, 0, 0, 0);
+	return source;
+}
+
+ALuint CreateBuffer() {
+	auto buffer = ALuint(0);
+	alGenBuffers(1, &buffer);
+	return buffer;
+}
+
+} // namespace
+
+Track::Track(not_null<Instance*> instance) : _instance(instance) {
+	_instance->registerTrack(this);
+}
+
+void Track::samplePeakEach(crl::time peakDuration) {
+	_peakDurationMs = peakDuration;
+}
+
+void Track::fillFromData(bytes::vector &&data) {
+	FFMpegLoader loader(Core::FileLocation(), QByteArray(), std::move(data));
+
+	auto position = qint64(0);
+	if (!loader.open(position)) {
+		_failed = true;
+		return;
+	}
+	auto format = loader.format();
+	_peakEachPosition = _peakDurationMs ? ((loader.samplesFrequency() * _peakDurationMs) / 1000) : 0;
+	const auto samplesCount = (loader.duration() * loader.samplesFrequency()) / 1000;
+	const auto peaksCount = _peakEachPosition ? (samplesCount / _peakEachPosition) : 0;
+	_peaks.reserve(peaksCount);
+	auto peakValue = uint16(0);
+	auto peakSamples = 0;
+	auto peakEachSample = (format == AL_FORMAT_STEREO8 || format == AL_FORMAT_STEREO16) ? (_peakEachPosition * 2) : _peakEachPosition;
+	_peakValueMin = 0x7FFF;
+	_peakValueMax = 0;
+	auto peakCallback = [this, &peakValue, &peakSamples, peakEachSample](uint16 sample) {
+		accumulate_max(peakValue, sample);
+		if (++peakSamples >= peakEachSample) {
+			peakSamples -= peakEachSample;
+			_peaks.push_back(peakValue);
+			accumulate_max(_peakValueMax, peakValue);
+			accumulate_min(_peakValueMin, peakValue);
+			peakValue = 0;
+		}
+	};
+	do {
+		using Error = AudioPlayerLoader::ReadError;
+		const auto result = loader.readMore();
+		Assert(result != Error::Wait && result != Error::RetryNotQueued);
+
+		if (result == Error::Retry) {
+			continue;
+		} else if (result == Error::EndOfFile) {
+			break;
+		} else if (result == Error::Other || result == Error::Wait) {
+			_failed = true;
+			break;
+		}
+		Assert(v::is<bytes::const_span>(result));
+		const auto sampleBytes = v::get<bytes::const_span>(result);
+		Assert(!sampleBytes.empty());
+		_samplesCount += sampleBytes.size() / loader.sampleSize();
+		_samples.insert(_samples.end(), sampleBytes.data(), sampleBytes.data() + sampleBytes.size());
+		if (peaksCount) {
+			if (format == AL_FORMAT_MONO8 || format == AL_FORMAT_STEREO8) {
+				Media::Audio::IterateSamples<uchar>(sampleBytes, peakCallback);
+			} else if (format == AL_FORMAT_MONO16 || format == AL_FORMAT_STEREO16) {
+				Media::Audio::IterateSamples<int16>(sampleBytes, peakCallback);
+			}
+		}
+	} while (true);
+
+	_alFormat = loader.format();
+	_sampleRate = loader.samplesFrequency();
+	_lengthMs = loader.duration();
+}
+
+void Track::fillFromFile(const Core::FileLocation &location) {
+	if (location.accessEnable()) {
+		fillFromFile(location.name());
+		location.accessDisable();
+	} else {
+		LOG(("Track Error: Could not enable access to file '%1'.").arg(location.name()));
+		_failed = true;
+	}
+}
+
+void Track::fillFromFile(const QString &filePath) {
+	QFile f(filePath);
+	if (f.open(QIODevice::ReadOnly)) {
+		auto size = f.size();
+		if (size > 0 && size <= kMaxFileSize) {
+			auto bytes = bytes::vector(size);
+			if (f.read(reinterpret_cast<char*>(bytes.data()), bytes.size()) == bytes.size()) {
+				fillFromData(std::move(bytes));
+			} else {
+				LOG(("Track Error: Could not read %1 bytes from file '%2'.").arg(bytes.size()).arg(filePath));
+				_failed = true;
+			}
+		} else {
+			LOG(("Track Error: Bad file '%1' size: %2.").arg(filePath).arg(size));
+			_failed = true;
+		}
+	} else {
+		LOG(("Track Error: Could not open file '%1'.").arg(filePath));
+		_failed = true;
+	}
+}
+
+void Track::playWithLooping(bool looping, float64 volumeOverride) {
+	_active = true;
+	if (failed() || _samples.empty()) {
+		finish();
+		return;
+	}
+	ensureSourceCreated();
+	alSourceStop(_alSource);
+	_looping = looping;
+	alSourcei(_alSource, AL_LOOPING, _looping ? 1 : 0);
+	alSourcef(
+		_alSource,
+		AL_GAIN,
+		(volumeOverride > 0)
+			? volumeOverride
+			: float64(Core::App().settings().notificationsVolume()) / 100.);
+	alSourcePlay(_alSource);
+	_instance->trackStarted(this);
+}
+
+void Track::finish() {
+	if (_active) {
+		_active = false;
+		_instance->trackFinished(this);
+	}
+	_alPosition = 0;
+}
+
+void Track::ensureSourceCreated() {
+	if (alIsSource(_alSource)) {
+		return;
+	}
+
+	{
+		QMutexLocker lock(Player::internal::audioPlayerMutex());
+		if (!AttachToDevice()) {
+			_failed = true;
+			return;
+		}
+	}
+
+	_alSource = CreateSource();
+	_alBuffer = CreateBuffer();
+
+	alBufferData(_alBuffer, _alFormat, _samples.data(), _samples.size(), _sampleRate);
+	alSourcei(_alSource, AL_BUFFER, _alBuffer);
+}
+
+void Track::updateState() {
+	if (!isActive() || !alIsSource(_alSource)) {
+		return;
+	}
+
+	_stateUpdatedAt = crl::now();
+	auto state = ALint(0);
+	alGetSourcei(_alSource, AL_SOURCE_STATE, &state);
+	if (state != AL_PLAYING) {
+		finish();
+	} else {
+		auto currentPosition = ALint(0);
+		alGetSourcei(_alSource, AL_SAMPLE_OFFSET, &currentPosition);
+		_alPosition = currentPosition;
+	}
+}
+
+float64 Track::getPeakValue(crl::time when) const {
+	if (!isActive() || !_samplesCount || _peaks.empty() || _peakValueMin == _peakValueMax) {
+		return 0.;
+	}
+	auto sampleIndex = (_alPosition + ((when - _stateUpdatedAt) * _sampleRate / 1000));
+	while (sampleIndex < 0) {
+		sampleIndex += _samplesCount;
+	}
+	sampleIndex = sampleIndex % _samplesCount;
+	auto peakIndex = (sampleIndex / _peakEachPosition) % _peaks.size();
+	return (_peaks[peakIndex] - _peakValueMin) / float64(_peakValueMax - _peakValueMin);
+}
+
+void Track::detachFromDevice() {
+	if (alIsSource(_alSource)) {
+		updateState();
+		alSourceStop(_alSource);
+		alSourcei(_alSource, AL_BUFFER, AL_NONE);
+		alDeleteBuffers(1, &_alBuffer);
+		alDeleteSources(1, &_alSource);
+	}
+	_alBuffer = 0;
+	_alSource = 0;
+}
+
+void Track::reattachToDevice() {
+	if (!isActive() || alIsSource(_alSource)) {
+		return;
+	}
+	ensureSourceCreated();
+
+	alSourcei(_alSource, AL_LOOPING, _looping ? 1 : 0);
+	alSourcei(_alSource, AL_SAMPLE_OFFSET, static_cast<ALint>(_alPosition));
+	alSourcePlay(_alSource);
+}
+
+Track::~Track() {
+	detachFromDevice();
+	_instance->unregisterTrack(this);
+}
+
+Instance::Instance()
+: _playbackDeviceId(
+	&Core::App().mediaDevices(),
+	Webrtc::DeviceType::Playback,
+	Webrtc::DeviceIdOrDefault(
+		Core::App().settings().playbackDeviceIdValue()))
+, _captureDeviceId(
+	&Core::App().mediaDevices(),
+	Webrtc::DeviceType::Capture,
+	Webrtc::DeviceIdOrDefault(
+		Core::App().settings().captureDeviceIdValue())) {
+	_updateTimer.setCallback([this] {
+		auto hasActive = false;
+		for (auto track : _tracks) {
+			track->updateState();
+			if (track->isActive()) {
+				hasActive = true;
+			}
+		}
+		if (hasActive) {
+			Audio::StopDetachIfNotUsedSafe();
+		}
+	});
+
+	_detachFromDeviceTimer.setCallback([=] {
+		_detachFromDeviceForce = false;
+		Player::internal::DetachFromDevice(this);
+	});
+
+	_playbackDeviceId.changes(
+	) | rpl::on_next([=](Webrtc::DeviceResolvedId id) {
+		if (Player::internal::DetachIfDeviceChanged(this, id)) {
+			_detachFromDeviceForce = false;
+		}
+	}, _lifetime);
+}
+
+Webrtc::DeviceResolvedId Instance::playbackDeviceId() const {
+	return _playbackDeviceId.threadSafeCurrent();
+}
+
+Webrtc::DeviceResolvedId Instance::captureDeviceId() const {
+	return _captureDeviceId.current();
+}
+
+std::unique_ptr<Track> Instance::createTrack() {
+	return std::make_unique<Track>(this);
+}
+
+Instance::~Instance() {
+	Expects(_tracks.empty());
+}
+
+void Instance::registerTrack(Track *track) {
+	_tracks.insert(track);
+}
+
+void Instance::unregisterTrack(Track *track) {
+	_tracks.erase(track);
+}
+
+void Instance::trackStarted(Track *track) {
+	stopDetachIfNotUsed();
+	if (!_updateTimer.isActive()) {
+		_updateTimer.callEach(kTrackUpdateTimeout);
+	}
+}
+
+void Instance::trackFinished(Track *track) {
+	if (!hasActiveTracks()) {
+		_updateTimer.cancel();
+		scheduleDetachIfNotUsed();
+	}
+}
+
+void Instance::detachTracks() {
+	for (auto track : _tracks) {
+		track->detachFromDevice();
+	}
+}
+
+void Instance::reattachTracks() {
+	if (!IsAttachedToDevice()) {
+		return;
+	}
+	for (auto track : _tracks) {
+		track->reattachToDevice();
+	}
+}
+
+bool Instance::hasActiveTracks() const {
+	for (auto track : _tracks) {
+		if (track->isActive()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+void Instance::scheduleDetachFromDevice() {
+	_detachFromDeviceForce = true;
+	scheduleDetachIfNotUsed();
+}
+
+void Instance::scheduleDetachIfNotUsed() {
+	if (!_detachFromDeviceTimer.isActive()) {
+		_detachFromDeviceTimer.callOnce(kDetachDeviceTimeout);
+	}
+}
+
+void Instance::stopDetachIfNotUsed() {
+	if (!_detachFromDeviceForce) {
+		_detachFromDeviceTimer.cancel();
+	}
+}
+
+Instance &Current() {
+	return Core::App().audio();
+}
+
+} // namespace Audio
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_audio_track.h
+++ b/Telegram/SourceFiles/media/audio/media_audio_track.h
@@ -0,0 +1,140 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include "base/timer.h"
+#include "base/bytes.h"
+#include "webrtc/webrtc_device_resolver.h"
+
+namespace Core {
+class FileLocation;
+} // namespace Core
+
+namespace Media {
+namespace Audio {
+
+class Instance;
+
+class Track {
+public:
+	Track(not_null<Instance*> instance);
+
+	void samplePeakEach(crl::time peakDuration);
+
+	void fillFromData(bytes::vector &&data);
+	void fillFromFile(const Core::FileLocation &location);
+	void fillFromFile(const QString &filePath);
+
+	void playOnce(float64 volumeOverride = -1) {
+		playWithLooping(false, volumeOverride);
+	}
+	void playInLoop(float64 volumeOverride = -1) {
+		playWithLooping(true, volumeOverride);
+	}
+
+	bool isLooping() const {
+		return _looping;
+	}
+	bool isActive() const {
+		return _active;
+	}
+	bool failed() const {
+		return _failed;
+	}
+
+	int64 getLengthMs() const {
+		return _lengthMs;
+	}
+	float64 getPeakValue(crl::time when) const;
+
+	void detachFromDevice();
+	void reattachToDevice();
+	void updateState();
+
+	~Track();
+
+private:
+	void finish();
+	void ensureSourceCreated();
+	void playWithLooping(bool looping, float64 volumeOverride);
+
+	not_null<Instance*> _instance;
+
+	bool _failed = false;
+	bool _active = false;
+	bool _looping = false;
+	float64 _volume = 1.;
+
+	int64 _samplesCount = 0;
+	int32 _sampleRate = 0;
+	bytes::vector _samples;
+
+	crl::time _peakDurationMs = 0;
+	int _peakEachPosition = 0;
+	std::vector<uint16> _peaks;
+	uint16 _peakValueMin = 0;
+	uint16 _peakValueMax = 0;
+
+	crl::time _lengthMs = 0;
+	crl::time _stateUpdatedAt = 0;
+
+	int32 _alFormat = 0;
+	int64 _alPosition = 0;
+	uint32 _alSource = 0;
+	uint32 _alBuffer = 0;
+
+};
+
+class Instance {
+public:
+	// Thread: Main.
+	Instance();
+
+	// Thread: Any. Must be locked: AudioMutex.
+	[[nodiscard]] Webrtc::DeviceResolvedId playbackDeviceId() const;
+
+	// Thread: Main.
+	[[nodiscard]] Webrtc::DeviceResolvedId captureDeviceId() const;
+
+	[[nodiscard]] std::unique_ptr<Track> createTrack();
+
+	void detachTracks();
+	void reattachTracks();
+	bool hasActiveTracks() const;
+
+	void scheduleDetachFromDevice();
+	void scheduleDetachIfNotUsed();
+	void stopDetachIfNotUsed();
+
+	~Instance();
+
+private:
+	friend class Track;
+	void registerTrack(Track *track);
+	void unregisterTrack(Track *track);
+	void trackStarted(Track *track);
+	void trackFinished(Track *track);
+
+private:
+	std::set<Track*> _tracks;
+	Webrtc::DeviceResolver _playbackDeviceId;
+	Webrtc::DeviceResolver _captureDeviceId;
+
+	base::Timer _updateTimer;
+
+	base::Timer _detachFromDeviceTimer;
+	bool _detachFromDeviceForce = false;
+
+	rpl::lifetime _lifetime;
+
+};
+
+[[nodiscard]] Instance &Current();
+
+} // namespace Audio
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_child_ffmpeg_loader.cpp
+++ b/Telegram/SourceFiles/media/audio/media_child_ffmpeg_loader.cpp
@@ -0,0 +1,113 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#include "media/audio/media_child_ffmpeg_loader.h"
+
+#include "core/crash_reports.h"
+#include "core/file_location.h"
+
+namespace Media {
+namespace {
+
+using FFmpeg::AvErrorWrap;
+
+} // namespace
+
+ChildFFMpegLoader::ChildFFMpegLoader(
+	std::unique_ptr<ExternalSoundData> &&data)
+: AbstractAudioFFMpegLoader(
+	Core::FileLocation(),
+	QByteArray(),
+	bytes::vector())
+, _parentData(std::move(data)) {
+	Expects(_parentData->codec != nullptr);
+}
+
+bool ChildFFMpegLoader::open(crl::time positionMs, float64 speed) {
+	const auto sample = (positionMs * samplesFrequency()) / 1000LL;
+	overrideDuration(sample, _parentData->duration);
+	return initUsingContext(_parentData->codec.get(), speed);
+}
+
+auto ChildFFMpegLoader::readFromInitialFrame() -> ReadResult {
+	if (!_parentData->frame) {
+		return ReadError::Wait;
+	}
+	return replaceFrameAndRead(base::take(_parentData->frame));
+}
+
+auto ChildFFMpegLoader::readMore() -> ReadResult {
+	if (_readTillEnd) {
+		return ReadError::EndOfFile;
+	}
+	const auto initialFrameResult = readFromInitialFrame();
+	if (initialFrameResult != ReadError::Wait) {
+		return initialFrameResult;
+	}
+
+	const auto readResult = readFromReadyContext(
+		_parentData->codec.get());
+	if (readResult != ReadError::Wait) {
+		return readResult;
+	}
+
+	if (_queue.empty()) {
+		if (!_eofReached) {
+			return ReadError::Wait;
+		}
+		_readTillEnd = true;
+		return ReadError::EndOfFile;
+	}
+
+	auto packet = std::move(_queue.front());
+	_queue.pop_front();
+
+	_eofReached = packet.empty();
+	if (_eofReached) {
+		avcodec_send_packet(_parentData->codec.get(), nullptr); // drain
+		return ReadError::Retry;
+	}
+
+	AvErrorWrap error = avcodec_send_packet(
+		_parentData->codec.get(),
+		&packet.fields());
+	if (error) {
+		LogError(u"avcodec_send_packet"_q, error);
+		// There is a sample voice message where skipping such packet
+		// results in a crash (read_access to nullptr) in swr_convert().
+		if (error.code() == AVERROR_INVALIDDATA) {
+			return ReadError::Retry; // try to skip bad packet
+		}
+		return ReadError::Other;
+	}
+	return ReadError::Retry;
+}
+
+void ChildFFMpegLoader::enqueuePackets(
+		std::deque<FFmpeg::Packet> &&packets) {
+	if (_queue.empty()) {
+		_queue = std::move(packets);
+	} else {
+		_queue.insert(
+			end(_queue),
+			std::make_move_iterator(packets.begin()),
+			std::make_move_iterator(packets.end()));
+	}
+	packets.clear();
+}
+
+void ChildFFMpegLoader::setForceToBuffer(bool force) {
+	_forceToBuffer = force;
+}
+
+bool ChildFFMpegLoader::forceToBuffer() const {
+	return _forceToBuffer;
+}
+
+ChildFFMpegLoader::~ChildFFMpegLoader() = default;
+
+} // namespace Media
--- a/Telegram/SourceFiles/media/audio/media_child_ffmpeg_loader.h
+++ b/Telegram/SourceFiles/media/audio/media_child_ffmpeg_loader.h
@@ -0,0 +1,62 @@
+/*
+This file is part of Telegram Desktop,
+the official desktop application for the Telegram messaging service.
+
+For license and copyright information please follow this link:
+https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+*/
+#pragma once
+
+#include "media/audio/media_audio_ffmpeg_loader.h"
+#include "media/streaming/media_streaming_utility.h"
+
+namespace Media {
+
+struct ExternalSoundData {
+	FFmpeg::CodecPointer codec;
+	FFmpeg::FramePointer frame;
+	crl::time duration = 0;
+	float64 speed = 1.; // 0.5 <= speed <= 2.
+};
+
+struct ExternalSoundPart {
+	AudioMsgId audio;
+	gsl::span<FFmpeg::Packet> packets;
+};
+
+class ChildFFMpegLoader : public AbstractAudioFFMpegLoader {
+public:
+	ChildFFMpegLoader(std::unique_ptr<ExternalSoundData> &&data);
+
+	bool open(crl::time positionMs, float64 speed = 1.) override;
+
+	bool check(const Core::FileLocation &file, const QByteArray &data) override {
+		return true;
+	}
+
+	ReadResult readMore() override;
+	void enqueuePackets(std::deque<FFmpeg::Packet> &&packets) override;
+	void setForceToBuffer(bool force) override;
+	bool forceToBuffer() const override;
+
+	bool eofReached() const {
+		return _eofReached;
+	}
+
+	~ChildFFMpegLoader();
+
+private:
+	// Streaming player reads first frame by itself and provides it together
+	// with the codec context. So we first read data from this frame and
+	// only after that we try to read next packets.
+	ReadResult readFromInitialFrame();
+
+	std::unique_ptr<ExternalSoundData> _parentData;
+	std::deque<FFmpeg::Packet> _queue;
+	bool _forceToBuffer = false;
+	bool _eofReached = false;
+	bool _readTillEnd = false;
+
+};
+
+} // namespace Media