diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 1e43302099..8f21f61c8c 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -52,6 +52,7 @@ and this project adheres to
 - Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
 - Removed several memory allocations that happened during inference. On a test scene, this
   reduced the amount of memory allocated by approximately 25%. (#4887)
+- Removed several memory allocations that happened during inference with discrete actions. (#4922)
 - Properly catch permission errors when writing timer files. (#4921)
 
 #### ml-agents / ml-agents-envs / gym-unity (Python)
diff --git a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
index 3952786306..403d1b666b 100644
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
@@ -1,4 +1,3 @@
-using System;
 using System.Collections.Generic;
 using System.Linq;
 using Unity.MLAgents.Inference.Utils;
@@ -55,62 +54,26 @@ internal class DiscreteActionOutputApplier : TensorApplier.IApplier
     {
         readonly int[] m_ActionSize;
         readonly Multinomial m_Multinomial;
-        readonly ITensorAllocator m_Allocator;
         readonly ActionSpec m_ActionSpec;
+        readonly int[] m_StartActionIndices;
+        readonly float[] m_CdfBuffer;
+
 
         public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
         {
             m_ActionSize = actionSpec.BranchSizes;
             m_Multinomial = new Multinomial(seed);
-            m_Allocator = allocator;
             m_ActionSpec = actionSpec;
+            m_StartActionIndices = Utilities.CumSum(m_ActionSize);
+
+            // Scratch space for computing the cumulative distribution function.
+            // In order to reuse it, make it the size of the largest branch.
+            var largestBranch = Mathf.Max(m_ActionSize);
+            m_CdfBuffer = new float[largestBranch];
         }
 
         public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
         {
-            //var tensorDataProbabilities = tensorProxy.Data as float[,];
-            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
-            var batchSize = idActionPairList.Count;
-            var actionValues = new float[batchSize, m_ActionSize.Length];
-            var startActionIndices = Utilities.CumSum(m_ActionSize);
-            for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
-            {
-                var nBranchAction = m_ActionSize[actionIndex];
-                var actionProbs = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, nBranchAction },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
-                };
-
-                for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
-                {
-                    for (var branchActionIndex = 0;
-                         branchActionIndex < nBranchAction;
-                         branchActionIndex++)
-                    {
-                        actionProbs.data[batchIndex, branchActionIndex] =
-                            tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
-                    }
-                }
-
-                var outputTensor = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, 1 },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
-                };
-
-                Eval(actionProbs, outputTensor, m_Multinomial);
-
-                for (var ii = 0; ii < batchSize; ii++)
-                {
-                    actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
-                }
-                actionProbs.data.Dispose();
-                outputTensor.data.Dispose();
-            }
-
             var agentIndex = 0;
             for (var i = 0; i < actionIds.Count; i++)
             {
@@ -126,7 +89,8 @@ public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int,
                     var discreteBuffer = actionBuffer.DiscreteActions;
                     for (var j = 0; j < m_ActionSize.Length; j++)
                     {
-                        discreteBuffer[j] = (int)actionValues[agentIndex, j];
+                        ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
+                        discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
                     }
                 }
                 agentIndex++;
@@ -134,66 +98,29 @@ public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int,
         }
 
         /// <summary>
-        /// Draw samples from a multinomial distribution based on log-probabilities specified
-        /// in tensor src. The samples will be saved in the dst tensor.
+        /// Compute the cumulative distribution function for a given agent's action
+        /// given the log-probabilities.
+        /// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
         /// </summary>
-        /// <param name="src">2-D tensor with shape batch_size x num_classes</param>
-        /// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
-        /// <param name="multinomial">Multinomial object used to sample values</param>
-        /// <exception cref="NotImplementedException">
-        /// Multinomial doesn't support integer tensors
-        /// </exception>
-        /// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
-        /// <exception cref="ArgumentNullException">
-        /// At least one of the tensors is not allocated
-        /// </exception>
-        public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
+        /// <param name="logProbs"></param>
+        /// <param name="batch">Index of the agent being considered</param>
+        /// <param name="channelOffset">Offset into the tensor's channel.</param>
+        /// <param name="branchSize"></param>
+        internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
         {
-            if (src.DataType != typeof(float))
+            // Find the class maximum
+            var maxProb = float.NegativeInfinity;
+            for (var cls = 0; cls < branchSize; ++cls)
             {
-                throw new NotImplementedException("Only float tensors are currently supported");
+                maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
             }
 
-            if (src.valueType != dst.valueType)
+            // Sum the log probabilities and compute CDF
+            var sumProb = 0.0f;
+            for (var cls = 0; cls < branchSize; ++cls)
             {
-                throw new ArgumentException(
-                    "Source and destination tensors have different types!");
-            }
-
-            if (src.data == null || dst.data == null)
-            {
-                throw new ArgumentNullException();
-            }
-
-            if (src.data.batch != dst.data.batch)
-            {
-                throw new ArgumentException("Batch size for input and output data is different!");
-            }
-
-            var cdf = new float[src.data.channels];
-
-            for (var batch = 0; batch < src.data.batch; ++batch)
-            {
-                // Find the class maximum
-                var maxProb = float.NegativeInfinity;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    maxProb = Mathf.Max(src.data[batch, cls], maxProb);
-                }
-
-                // Sum the log probabilities and compute CDF
-                var sumProb = 0.0f;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
-                    cdf[cls] = sumProb;
-                }
-
-                // Generate the samples
-                for (var sample = 0; sample < dst.data.channels; ++sample)
-                {
-                    dst.data[batch, sample] = multinomial.Sample(cdf);
-                }
+                sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
+                m_CdfBuffer[cls] = sumProb;
             }
         }
     }
diff --git a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
index a2c06bcdce..41603dd3ba 100644
--- a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
+++ b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
@@ -32,10 +32,11 @@ public Multinomial(int seed)
         /// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
         /// the array will be 1.0.
         /// </param>
-        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
-        public int Sample(float[] cmf)
+        /// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
+        /// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
+        public int Sample(float[] cmf, int branchSize)
         {
-            var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
+            var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
             var cls = 0;
             while (cmf[cls] < p)
             {
@@ -44,5 +45,15 @@ public int Sample(float[] cmf)
 
             return cls;
         }
+
+        /// <summary>
+        /// Samples from the Multinomial distribution defined by the provided cumulative
+        /// mass function.
+        /// </summary>
+        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
+        public int Sample(float[] cmf)
+        {
+            return Sample(cmf, cmf.Length);
+        }
     }
 }
diff --git a/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs b/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
index d2f28f7eca..b431f4f740 100644
--- a/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
@@ -1,193 +1,47 @@
-using System;
+using System.Collections.Generic;
 using Unity.Barracuda;
 using NUnit.Framework;
-using UnityEngine;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
-using Unity.MLAgents.Inference.Utils;
 
 namespace Unity.MLAgents.Tests
 {
     public class DiscreteActionOutputApplierTest
     {
         [Test]
-        public void TestEvalP()
+        public void TestDiscreteApply()
         {
-            var m = new Multinomial(2018);
+            var actionSpec = ActionSpec.MakeDiscrete(3, 2);
+            const float smallLogProb = -1000.0f;
+            const float largeLogProb = -1.0f;
 
-            var src = new TensorProxy
-            {
-                data = new Tensor(1, 3, new[] { 0.1f, 0.2f, 0.7f }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(1, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 1 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestEvalLogits()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
+            var logProbs = new TensorProxy
             {
                 data = new Tensor(
-                    1,
-                    3,
-                    new[] { Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50 }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(1, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 2 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestEvalBatching()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                data = new Tensor(2, 3, new[]
-                {
-                    Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50,
-                    Mathf.Log(0.3f) - 25, Mathf.Log(0.4f) - 25, Mathf.Log(0.3f) - 25
-                }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(2, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 2, 0, 1, 0 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestSrcInt()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.Integer
-            };
-
-            Assert.Throws<NotImplementedException>(
-                () => DiscreteActionOutputApplier.Eval(src, null, m));
-        }
-
-        [Test]
-        public void TestDstInt()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.Integer
-            };
-
-            Assert.Throws<ArgumentException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestSrcDataNull()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            Assert.Throws<ArgumentNullException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestDstDataNull()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(0, 1)
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            Assert.Throws<ArgumentNullException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestUnequalBatchSize()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(1, 1)
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(2, 1)
-            };
-
-            Assert.Throws<ArgumentException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
+                    2,
+                    5,
+                    new[]
+                    {
+                        smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0
+                        smallLogProb, largeLogProb,               // Agent 0, branch 1
+                        largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0
+                        largeLogProb, smallLogProb,               // Agent 1, branch 1
+                    }),
+                valueType = TensorProxy.TensorType.FloatingPoint
+            };
+
+            var applier = new DiscreteActionOutputApplier(actionSpec, 2020, null);
+            var agentIds = new List<int> { 42, 1337 };
+            var actionBuffers = new Dictionary<int, ActionBuffers>();
+            actionBuffers[42] = new ActionBuffers(actionSpec);
+            actionBuffers[1337] = new ActionBuffers(actionSpec);
+
+            applier.Apply(logProbs, agentIds, actionBuffers);
+            Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]);
+            Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]);
+
+            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]);
+            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]);
         }
     }
 }