diff --git a/dxvk.conf b/dxvk.conf
index d8ff2989a..55cc232f3 100644
--- a/dxvk.conf
+++ b/dxvk.conf
@@ -76,6 +76,36 @@
 # d3d9.maxFrameRate = 0
 
 
+# Controls latency sleep and Nvidia Reflex support.
+#
+# Supported values:
+# - Auto: By default, DXVK only supports latency sleep in D3D11 games that
+#         use Reflex if the graphics driver supports VK_NV_low_latency2,
+#         and if dxvk-nvapi is enabled in Proton.
+# - True: Enables built-in latency reduction based on internal timings.
+#         This assumes that input sampling for any given frame happens after
+#         the D3D9 or DXGI Present call returns; games that render and present
+#         asynchronously will not behave as intended.
+#         Similarly, this will not have any effect in games with built-in frame
+#         rate limiters, or if an external limiter (such as MangoHud) is used.
+#         In some games, enabling this may reduce performance or lead to less
+#         consistent frame pacing.
+#         The implementation will either use VK_NV_low_latency2 if supported
+#         by the driver, or a custom algorithm.
+# - False: Disable Reflex support as well as built-in latency reduction.
+  
+# dxvk.latencySleep = Auto
+
+
+# Tolerance for the latency sleep heuristic, in microseconds. Higher values
+# increase latency, but may lead to better frame pacing in some cases. Does
+# not have any effect if NV_low_latency2 is used.
+#
+# Supported values: Any non-negative number
+
+# dxvk.latencyTolerance = 1000
+
+
 # Override PCI vendor and device IDs reported to the application. Can
 # cause the app to adjust behaviour depending on the selected values.
 #
diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp
index d9707c246..c029949aa 100644
--- a/src/dxvk/dxvk_device.cpp
+++ b/src/dxvk/dxvk_device.cpp
@@ -1,5 +1,6 @@
 #include "dxvk_device.h"
 #include "dxvk_instance.h"
+#include "dxvk_latency_builtin.h"
 
 namespace dxvk {
   
@@ -305,6 +306,16 @@ namespace dxvk {
   }
 
 
+  Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
+    const Rc<Presenter>&            presenter) {
+    if (m_options.latencySleep != Tristate::True)
+      return nullptr;
+
+    return new DxvkBuiltInLatencyTracker(
+      m_options.latencyTolerance);
+  }
+
+
   void DxvkDevice::presentImage(
     const Rc<Presenter>&            presenter,
           uint64_t                  frameId,
diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h
index a40a25fec..5d8b4e37b 100644
--- a/src/dxvk/dxvk_device.h
+++ b/src/dxvk/dxvk_device.h
@@ -10,6 +10,7 @@
 #include "dxvk_framebuffer.h"
 #include "dxvk_image.h"
 #include "dxvk_instance.h"
+#include "dxvk_latency.h"
 #include "dxvk_memory.h"
 #include "dxvk_meta_clear.h"
 #include "dxvk_objects.h"
@@ -478,6 +479,16 @@ namespace dxvk {
     void requestCompileShader(
       const Rc<DxvkShader>&         shader);
 
+    /**
+     * \brief Creates latency tracker for a presenter
+     *
+     * The specicfic implementation and parameters used
+     * depend on user configuration.
+     * \param [in] presenter Presenter instance
+     */
+    Rc<DxvkLatencyTracker> createLatencyTracker(
+      const Rc<Presenter>&            presenter);
+
     /**
      * \brief Presents a swap chain image
      * 
diff --git a/src/dxvk/dxvk_latency.h b/src/dxvk/dxvk_latency.h
new file mode 100644
index 000000000..20b348365
--- /dev/null
+++ b/src/dxvk/dxvk_latency.h
@@ -0,0 +1,185 @@
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+
+#include "../util/util_likely.h"
+#include "../util/util_time.h"
+
+#include "../util/rc/util_rc_ptr.h"
+
+#include "../vulkan/vulkan_loader.h"
+
+namespace dxvk {
+
+  /**
+   * \brief Latency tracker statistics
+   */
+  struct DxvkLatencyStats {
+    std::chrono::microseconds frameLatency;
+    std::chrono::microseconds sleepDuration;
+  };
+
+
+  /**
+   * \brief Latency tracker
+   *
+   * Accumulates time stamps of certain parts of a frame.
+   */
+  class DxvkLatencyTracker {
+
+  public:
+
+    virtual ~DxvkLatencyTracker() { }
+
+    /**
+     * \brief Increments ref count
+     */
+    void incRef() {
+      m_refCount.fetch_add(1, std::memory_order_acquire);
+    }
+
+    /**
+     * \brief Decrements ref count
+     *
+     * Destroys the object when there are no users left.
+     */
+    void decRef() {
+      if (m_refCount.fetch_sub(1, std::memory_order_release) == 1u)
+        delete this;
+    }
+
+    /**
+     * \brief Called when presentation begins on the CPU timeline
+     *
+     * Must happen before acquiring an image from the presenter.
+     * \param [in] frameId Current frame ID
+     */
+    virtual void notifyCpuPresentBegin(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when the CS thread reaches a given frame
+     *
+     * Should be recorded into the CS thread after completing
+     * the previous frame on the application's CPU timeline.
+     * \param [in] frameId Current frame ID
+     */
+    virtual void notifyCsRenderBegin(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when the CS thread completes a frame
+     *
+     * Should be recorded into the CS thread after recording
+     * presentation commands for that frame.
+     * \param [in] frameId Current frame ID
+     */
+    virtual void notifyCsRenderEnd(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when presentation ends on the CPU timeline
+     *
+     * Must happen after acquiring an image for presentation, but
+     * before synchronizing with previous frames or performing
+     * latency sleep. The intention is to measure acquire delays.
+     * \param [in] frameId Current frame ID
+     */
+    virtual void notifyCpuPresentEnd(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when a command list is submitted to the GPU
+     *
+     * \param [in] frameId Associated frame ID
+     */
+    virtual void notifyQueueSubmit(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when a frame is queued for presentation
+     *
+     * \param [in] frameId Associated frame ID
+     */
+    virtual void notifyQueuePresentBegin(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called after a frame has been queued for presentation
+     *
+     * \param [in] frameId Associated frame ID
+     * \param [in] status Result of the present operation
+     */
+    virtual void notifyQueuePresentEnd(
+            uint64_t                  frameId,
+            VkResult                  status) = 0;
+
+    /**
+     * \brief Called when a submission begins execution on the GPU
+     *
+     * Any previous submissions will have completed by this time. This
+     * can be used to measure GPU idle time throughout a frame.
+     * \param [in] frameId Associated frame ID
+     */
+    virtual void notifyGpuExecutionBegin(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when a submission completes execution on the GPU
+     *
+     * The previous submission will have completed by the time this
+     * gets called. This may be used to measure GPU idle time.
+     * \param [in] frameId Associated frame ID
+     */
+    virtual void notifyGpuExecutionEnd(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Called when presentation of a given frame finishes on the GPU
+     *
+     * This is generally the last thing that happens within a frame.
+     * \param [in] frameId Associated frame ID
+     */
+    virtual void notifyGpuPresentEnd(
+            uint64_t                  frameId) = 0;
+
+    /**
+     * \brief Performs latency sleep and begins next frame
+     *
+     * Uses latency data from previous frames to estimate when to wake
+     * up the application thread in order to minimize input latency.
+     * \param [in] frameId Frame ID of the upcoming frame
+     * \param [in] maxFrameRate Maximum frame rate or refresh rate
+     */
+    virtual void sleepAndBeginFrame(
+            uint64_t                  frameId,
+            double                    maxFrameRate) = 0;
+
+    /**
+     * \brief Discards all current timing data
+     *
+     * Should be called to reset latency tracking in case
+     * presentation failed for any given frame.
+     */
+    virtual void discardTimings() = 0;
+
+    /**
+     * \brief Queries statistics for the given frame
+     *
+     * Returns statistics for the frame closest to \c frameId for
+     * which data is available. If no such frame exists, the stat
+     * counters will return 0.
+     * \param [in] frameId Frame to query
+     */
+    virtual DxvkLatencyStats getStatistics(
+            uint64_t                  frameId) = 0;
+
+  private:
+
+    std::atomic<uint64_t> m_refCount = { 0u };
+
+  };
+
+}
diff --git a/src/dxvk/dxvk_latency_builtin.cpp b/src/dxvk/dxvk_latency_builtin.cpp
new file mode 100644
index 000000000..8d3c0fa1a
--- /dev/null
+++ b/src/dxvk/dxvk_latency_builtin.cpp
@@ -0,0 +1,317 @@
+#include <cmath>
+
+#include "dxvk_latency_builtin.h"
+
+#include "../util/log/log.h"
+
+#include "../util/util_fps_limiter.h"
+#include "../util/util_string.h"
+
+namespace dxvk {
+
+  DxvkBuiltInLatencyTracker::DxvkBuiltInLatencyTracker(
+          int32_t                   toleranceUs)
+  : m_tolerance(std::chrono::duration_cast<duration>(
+      std::chrono::microseconds(std::max(toleranceUs, 0)))) {
+    Logger::info("Latency control enabled, using built-in algorithm");
+    auto limit = FpsLimiter::getEnvironmentOverride();
+
+    if (limit)
+      m_envFpsLimit = *limit;
+  }
+
+
+  DxvkBuiltInLatencyTracker::~DxvkBuiltInLatencyTracker() {
+
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyCpuPresentBegin(
+          uint64_t                  frameId) {
+    // Not interesting here
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyCpuPresentEnd(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = findFrame(frameId);
+
+    if (frame)
+      frame->cpuPresentEnd = dxvk::high_resolution_clock::now();
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyCsRenderBegin(
+          uint64_t                  frameId) {
+    // Not interesting here
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyCsRenderEnd(
+          uint64_t                  frameId) {
+    // Not interesting here
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyQueueSubmit(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = findFrame(frameId);
+
+    if (frame && frame->queueSubmit == time_point())
+      frame->queueSubmit = dxvk::high_resolution_clock::now();
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyQueuePresentBegin(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = findFrame(frameId);
+
+    if (frame)
+      frame->queuePresent = dxvk::high_resolution_clock::now();
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyQueuePresentEnd(
+          uint64_t                  frameId,
+          VkResult                  status) {
+    // Not interesting
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyGpuExecutionBegin(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = findFrame(frameId);
+
+    if (frame) {
+      auto now = dxvk::high_resolution_clock::now();
+
+      if (frame->gpuExecStart == time_point())
+        frame->gpuExecStart = now;
+
+      if (frame->gpuIdleStart != time_point()) {
+        frame->gpuIdleTime += now - frame->gpuIdleStart;
+        frame->gpuIdleEnd = now;
+      }
+    }
+
+    m_cond.notify_one();
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyGpuExecutionEnd(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = findFrame(frameId);
+
+    if (frame) {
+      auto now = dxvk::high_resolution_clock::now();
+
+      frame->gpuExecEnd = now;
+      frame->gpuIdleStart = now;
+    }
+  }
+
+
+  void DxvkBuiltInLatencyTracker::notifyGpuPresentEnd(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = findFrame(frameId);
+
+    if (frame)
+      frame->gpuPresent = dxvk::high_resolution_clock::now();
+
+    m_cond.notify_one();
+  }
+
+
+  void DxvkBuiltInLatencyTracker::sleepAndBeginFrame(
+          uint64_t                  frameId,
+          double                    maxFrameRate) {
+    auto duration = sleep(frameId, maxFrameRate);
+
+    std::unique_lock lock(m_mutex);
+
+    auto next = initFrame(frameId);
+    next->frameStart = dxvk::high_resolution_clock::now();
+    next->sleepDuration = duration;
+  }
+
+
+  void DxvkBuiltInLatencyTracker::discardTimings() {
+    std::unique_lock lock(m_mutex);
+    m_validRangeBegin = m_validRangeEnd + 1u;
+  }
+
+
+  DxvkLatencyStats DxvkBuiltInLatencyTracker::getStatistics(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+
+    DxvkLatencyStats stats = { };
+
+    while (frameId && frameId >= m_validRangeBegin) {
+      auto f = findFrame(frameId--);
+
+      if (f && f->gpuPresent != time_point()) {
+        stats.frameLatency = std::chrono::duration_cast<std::chrono::microseconds>(f->gpuPresent - f->frameStart);
+        stats.sleepDuration = std::chrono::duration_cast<std::chrono::microseconds>(f->sleepDuration);
+        break;
+      }
+    }
+
+    return stats;
+  }
+
+
+  DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::sleep(
+          uint64_t                  frameId,
+          double                    maxFrameRate) {
+    // Wait for all relevant timings to become available. This should
+    // generally not stall for very long if a maximum frame latency of
+    // 1 is enforced correctly by the swap chain.
+    std::unique_lock lock(m_mutex);
+
+    for (uint32_t i = 2; i <= FrameCount; i++) {
+      auto f = findFrame(frameId - i);
+
+      if (!f || f->cpuPresentEnd == time_point())
+        return duration(0u);
+
+      m_cond.wait(lock, [f] {
+        return f->gpuPresent != time_point();
+      });
+    }
+
+    // Frame entry of the last frame that fully completed
+    auto prev = findFrame(frameId - 2u);
+
+    // The way we want to align subsequent frames depends on whether
+    // we are limited by GPU performance or display refresh.
+    //
+    // In either case, we estimate the amount of CPU time the game requires
+    // before any GPU work can start to be the delay between frame start and
+    // first submission, plus any GPU idle time during the frame. This is not
+    // accurate if there are forced GPU sync points, but we can't work around
+    // that in a meaningful way.
+    constexpr size_t EntryCount = FrameCount - 1u;
+
+    std::array<duration, EntryCount> cpuTimes = { };
+    std::array<duration, EntryCount> gpuTimes = { };
+
+    for (uint32_t i = 0; i < EntryCount; i++) {
+      auto f = findFrame(frameId - (i + 2u));
+
+      cpuTimes[i] = (f->queueSubmit - f->frameStart) + f->gpuIdleTime;
+      gpuTimes[i] = (f->gpuExecEnd - f->gpuExecStart) - f->gpuIdleTime;
+    }
+
+    duration nextCpuTime = estimateTime(cpuTimes.data(), cpuTimes.size());
+    duration nextGpuTime = estimateTime(gpuTimes.data(), gpuTimes.size());
+
+    // Compute the initial deadline based on GPU execution times
+    time_point gpuDeadline = prev->gpuExecEnd + 2u * nextGpuTime;
+
+    // If we're rendering faster than refresh, use present_wait timings from
+    // previous frames as a starting point and compute an average in order to
+    // account for potentially erratic present_wait delays.
+    duration frameInterval = computeFrameInterval(maxFrameRate);
+
+    if (frameInterval.count()) {
+      duration nextPresentFromPrev = duration(0u);
+
+      for (uint32_t i = 2; i <= FrameCount; i++) {
+        auto f = findFrame(frameId - i);
+
+        time_point deadline = f->gpuPresent + i * frameInterval - m_tolerance;
+        nextPresentFromPrev += deadline - prev->gpuPresent;
+      }
+
+      time_point wsiDeadline = prev->gpuPresent + (nextPresentFromPrev / int32_t(FrameCount - 1u));
+      gpuDeadline = std::max(gpuDeadline, wsiDeadline);
+    }
+
+    // Line up the next frame in such a way that the first GPU submission
+    // happens just before the current frame's final submission completes
+    time_point gpuStartTime = gpuDeadline - nextGpuTime;
+    time_point cpuStartTime = gpuStartTime - nextCpuTime - m_tolerance;
+
+    time_point now = dxvk::high_resolution_clock::now();
+
+    // Release lock before actually sleeping, or
+    // it will affect the time measurements.
+    lock.unlock();
+
+    Sleep::sleepUntil(now, cpuStartTime);
+    return std::max(duration(0u), cpuStartTime - now);
+  }
+
+
+  DxvkLatencyFrameData* DxvkBuiltInLatencyTracker::initFrame(
+          uint64_t                  frameId) {
+    if (m_validRangeEnd + 1u != frameId)
+      m_validRangeBegin = frameId;
+
+    if (m_validRangeBegin + FrameCount <= frameId)
+      m_validRangeBegin = frameId + 1u - FrameCount;
+
+    m_validRangeEnd = frameId;
+
+    auto& frame = m_frames[frameId % FrameCount];
+    frame = DxvkLatencyFrameData();
+    frame.frameId = frameId;
+    return &frame;
+  }
+
+
+  DxvkLatencyFrameData* DxvkBuiltInLatencyTracker::findFrame(
+          uint64_t                  frameId) {
+    return frameId >= m_validRangeBegin && frameId <= m_validRangeEnd
+      ? &m_frames[frameId % FrameCount]
+      : nullptr;
+  }
+
+
+  DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::computeFrameInterval(
+          double                    maxFrameRate) {
+    if (m_envFpsLimit > 0.0)
+      maxFrameRate = m_envFpsLimit;
+
+    return computeIntervalFromRate(maxFrameRate);
+  }
+
+
+  DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::computeIntervalFromRate(
+          double                    frameRate) {
+    if (frameRate <= 0.0 || !std::isnormal(frameRate))
+      return duration(0u);
+
+    uint64_t ns = uint64_t(1'000'000'000.0 / frameRate);
+    return std::chrono::duration_cast<duration>(std::chrono::nanoseconds(ns));
+  }
+
+
+  DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::estimateTime(
+    const duration*                 frames,
+          size_t                    frameCount) {
+    // For each frame, find the median of its neighbours, then
+    // use the maximum of those medians as our estimate.
+    duration result = duration(0u);
+
+    for (size_t i = 0u; i < frameCount - 2u; i++) {
+      duration a = frames[i];
+      duration b = frames[i + 1];
+      duration c = frames[i + 2];
+
+      duration min = std::min(std::min(a, b), c);
+      duration max = std::max(std::max(a, b), c);
+
+      result = std::max(result, a + b + c - min - max);
+    }
+
+    return result;
+  }
+}
diff --git a/src/dxvk/dxvk_latency_builtin.h b/src/dxvk/dxvk_latency_builtin.h
new file mode 100644
index 000000000..dcbc4be31
--- /dev/null
+++ b/src/dxvk/dxvk_latency_builtin.h
@@ -0,0 +1,134 @@
+#pragma once
+
+#include <array>
+
+#include "dxvk_latency.h"
+
+#include "../util/thread.h"
+
+#include "../util/util_sleep.h"
+#include "../util/util_time.h"
+
+#include "../util/config/config.h"
+
+#include "../util/sync/sync_spinlock.h"
+
+namespace dxvk {
+
+  /**
+   * \brief Timings for a single tracked frame
+   */
+  struct DxvkLatencyFrameData {
+    using time_point = dxvk::high_resolution_clock::time_point;
+    using duration = dxvk::high_resolution_clock::duration;
+
+    uint64_t    frameId         = 0u;
+    time_point  frameStart      = time_point();
+    time_point  cpuPresentEnd   = time_point();
+    time_point  queueSubmit     = time_point();
+    time_point  queuePresent    = time_point();
+    time_point  gpuExecStart    = time_point();
+    time_point  gpuExecEnd      = time_point();
+    time_point  gpuIdleStart    = time_point();
+    time_point  gpuIdleEnd      = time_point();
+    duration    gpuIdleTime     = duration(0u);
+    time_point  gpuPresent      = time_point();
+    duration    sleepDuration   = duration(0u);
+  };
+
+
+  /**
+   * \brief Built-in latency tracker
+   *
+   * Implements a simple latency reduction algorithm
+   * based on CPU timestamps received from the backend.
+   */
+  class DxvkBuiltInLatencyTracker : public DxvkLatencyTracker {
+    using time_point = typename DxvkLatencyFrameData::time_point;
+    using duration = typename DxvkLatencyFrameData::duration;
+
+    constexpr static size_t FrameCount = 8u;
+  public:
+
+    DxvkBuiltInLatencyTracker(
+            int32_t                   toleranceUs);
+
+    ~DxvkBuiltInLatencyTracker();
+
+    void notifyCpuPresentBegin(
+            uint64_t                  frameId);
+
+    void notifyCpuPresentEnd(
+            uint64_t                  frameId);
+
+    void notifyCsRenderBegin(
+            uint64_t                  frameId);
+
+    void notifyCsRenderEnd(
+            uint64_t                  frameId);
+
+    void notifyQueueSubmit(
+            uint64_t                  frameId);
+
+    void notifyQueuePresentBegin(
+            uint64_t                  frameId);
+
+    void notifyQueuePresentEnd(
+            uint64_t                  frameId,
+            VkResult                  status);
+
+    void notifyGpuExecutionBegin(
+            uint64_t                  frameId);
+
+    void notifyGpuExecutionEnd(
+            uint64_t                  frameId);
+
+    void notifyGpuPresentEnd(
+            uint64_t                  frameId);
+
+    void sleepAndBeginFrame(
+            uint64_t                  frameId,
+            double                    maxFrameRate);
+
+    void discardTimings();
+
+    DxvkLatencyStats getStatistics(
+            uint64_t                  frameId);
+
+  private:
+
+    dxvk::mutex               m_mutex;
+    dxvk::condition_variable  m_cond;
+
+    duration                  m_tolerance;
+
+    double                    m_envFpsLimit = 0.0;
+
+    std::array<DxvkLatencyFrameData, FrameCount> m_frames = { };
+
+    uint64_t m_validRangeBegin = 0u;
+    uint64_t m_validRangeEnd = 0u;
+
+    duration sleep(
+            uint64_t                  frameId,
+            double                    maxFrameRate);
+
+    DxvkLatencyFrameData* initFrame(
+            uint64_t                  frameId);
+
+    DxvkLatencyFrameData* findFrame(
+            uint64_t                  frameId);
+
+    duration computeFrameInterval(
+            double                    maxFrameRate);
+
+    static duration computeIntervalFromRate(
+            double                    frameRate);
+
+    static duration estimateTime(
+      const duration*                 frames,
+            size_t                    frameCount);
+
+  };
+
+}
diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp
index c939a7a41..8fbf20bb6 100644
--- a/src/dxvk/dxvk_options.cpp
+++ b/src/dxvk/dxvk_options.cpp
@@ -12,6 +12,8 @@ namespace dxvk {
     useRawSsbo            = config.getOption<Tristate>("dxvk.useRawSsbo",             Tristate::Auto);
     hud                   = config.getOption<std::string>("dxvk.hud", "");
     tearFree              = config.getOption<Tristate>("dxvk.tearFree",               Tristate::Auto);
+    latencySleep          = config.getOption<Tristate>("dxvk.latencySleep",           Tristate::Auto);
+    latencyTolerance      = config.getOption<int32_t> ("dxvk.latencyTolerance",       1000);
     hideIntegratedGraphics = config.getOption<bool>   ("dxvk.hideIntegratedGraphics", false);
     zeroMappedMemory      = config.getOption<bool>    ("dxvk.zeroMappedMemory",       false);
     allowFse              = config.getOption<bool>    ("dxvk.allowFse",               false);
diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h
index 0994f1d5f..dcbd52230 100644
--- a/src/dxvk/dxvk_options.h
+++ b/src/dxvk/dxvk_options.h
@@ -37,6 +37,12 @@ namespace dxvk {
     /// or FIFO_RELAXED (if false) present mode
     Tristate tearFree = Tristate::Auto;
 
+    /// Enables latency sleep
+    Tristate latencySleep = Tristate::Auto;
+
+    /// Latency tolerance, in microseconds
+    int32_t latencyTolerance = 0u;
+
     // Hides integrated GPUs if dedicated GPUs are
     // present. May be necessary for some games that
     // incorrectly assume monitor layouts.
diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build
index 9ab23dab2..789b911c8 100644
--- a/src/dxvk/meson.build
+++ b/src/dxvk/meson.build
@@ -90,6 +90,7 @@ dxvk_src = [
   'dxvk_graphics.cpp',
   'dxvk_image.cpp',
   'dxvk_instance.cpp',
+  'dxvk_latency_builtin.cpp',
   'dxvk_memory.cpp',
   'dxvk_meta_blit.cpp',
   'dxvk_meta_clear.cpp',