diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 1e43302099..8f21f61c8c 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -52,6 +52,7 @@ and this project adheres to - Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886) - Removed several memory allocations that happened during inference. On a test scene, this reduced the amount of memory allocated by approximately 25%. (#4887) +- Removed several memory allocations that happened during inference with discrete actions. (#4922) - Properly catch permission errors when writing timer files. (#4921) #### ml-agents / ml-agents-envs / gym-unity (Python) diff --git a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs index 3952786306..403d1b666b 100644 --- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs +++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs @@ -1,4 +1,3 @@ -using System; using System.Collections.Generic; using System.Linq; using Unity.MLAgents.Inference.Utils; @@ -55,62 +54,26 @@ internal class DiscreteActionOutputApplier : TensorApplier.IApplier { readonly int[] m_ActionSize; readonly Multinomial m_Multinomial; - readonly ITensorAllocator m_Allocator; readonly ActionSpec m_ActionSpec; + readonly int[] m_StartActionIndices; + readonly float[] m_CdfBuffer; + public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) { m_ActionSize = actionSpec.BranchSizes; m_Multinomial = new Multinomial(seed); - m_Allocator = allocator; m_ActionSpec = actionSpec; + m_StartActionIndices = Utilities.CumSum(m_ActionSize); + + // Scratch space for computing the cumulative distribution function. + // In order to reuse it, make it the size of the largest branch. + var largestBranch = Mathf.Max(m_ActionSize); + m_CdfBuffer = new float[largestBranch]; } public void Apply(TensorProxy tensorProxy, IList actionIds, Dictionary lastActions) { - //var tensorDataProbabilities = tensorProxy.Data as float[,]; - var idActionPairList = actionIds as List ?? actionIds.ToList(); - var batchSize = idActionPairList.Count; - var actionValues = new float[batchSize, m_ActionSize.Length]; - var startActionIndices = Utilities.CumSum(m_ActionSize); - for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++) - { - var nBranchAction = m_ActionSize[actionIndex]; - var actionProbs = new TensorProxy() - { - valueType = TensorProxy.TensorType.FloatingPoint, - shape = new long[] { batchSize, nBranchAction }, - data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction)) - }; - - for (var batchIndex = 0; batchIndex < batchSize; batchIndex++) - { - for (var branchActionIndex = 0; - branchActionIndex < nBranchAction; - branchActionIndex++) - { - actionProbs.data[batchIndex, branchActionIndex] = - tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex]; - } - } - - var outputTensor = new TensorProxy() - { - valueType = TensorProxy.TensorType.FloatingPoint, - shape = new long[] { batchSize, 1 }, - data = m_Allocator.Alloc(new TensorShape(batchSize, 1)) - }; - - Eval(actionProbs, outputTensor, m_Multinomial); - - for (var ii = 0; ii < batchSize; ii++) - { - actionValues[ii, actionIndex] = outputTensor.data[ii, 0]; - } - actionProbs.data.Dispose(); - outputTensor.data.Dispose(); - } - var agentIndex = 0; for (var i = 0; i < actionIds.Count; i++) { @@ -126,7 +89,8 @@ public void Apply(TensorProxy tensorProxy, IList actionIds, Dictionary actionIds, Dictionary - /// Draw samples from a multinomial distribution based on log-probabilities specified - /// in tensor src. The samples will be saved in the dst tensor. + /// Compute the cumulative distribution function for a given agent's action + /// given the log-probabilities. + /// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches. /// - /// 2-D tensor with shape batch_size x num_classes - /// Allocated tensor with size batch_size x num_samples - /// Multinomial object used to sample values - /// - /// Multinomial doesn't support integer tensors - /// - /// Issue with tensor shape or type - /// - /// At least one of the tensors is not allocated - /// - public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial) + /// + /// Index of the agent being considered + /// Offset into the tensor's channel. + /// + internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize) { - if (src.DataType != typeof(float)) + // Find the class maximum + var maxProb = float.NegativeInfinity; + for (var cls = 0; cls < branchSize; ++cls) { - throw new NotImplementedException("Only float tensors are currently supported"); + maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb); } - if (src.valueType != dst.valueType) + // Sum the log probabilities and compute CDF + var sumProb = 0.0f; + for (var cls = 0; cls < branchSize; ++cls) { - throw new ArgumentException( - "Source and destination tensors have different types!"); - } - - if (src.data == null || dst.data == null) - { - throw new ArgumentNullException(); - } - - if (src.data.batch != dst.data.batch) - { - throw new ArgumentException("Batch size for input and output data is different!"); - } - - var cdf = new float[src.data.channels]; - - for (var batch = 0; batch < src.data.batch; ++batch) - { - // Find the class maximum - var maxProb = float.NegativeInfinity; - for (var cls = 0; cls < src.data.channels; ++cls) - { - maxProb = Mathf.Max(src.data[batch, cls], maxProb); - } - - // Sum the log probabilities and compute CDF - var sumProb = 0.0f; - for (var cls = 0; cls < src.data.channels; ++cls) - { - sumProb += Mathf.Exp(src.data[batch, cls] - maxProb); - cdf[cls] = sumProb; - } - - // Generate the samples - for (var sample = 0; sample < dst.data.channels; ++sample) - { - dst.data[batch, sample] = multinomial.Sample(cdf); - } + sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb); + m_CdfBuffer[cls] = sumProb; } } } diff --git a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs index a2c06bcdce..41603dd3ba 100644 --- a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs +++ b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs @@ -32,10 +32,11 @@ public Multinomial(int seed) /// to be monotonic (always increasing). If the CMF is scaled, then the last entry in /// the array will be 1.0. /// - /// A sampled index from the CMF ranging from 0 to cmf.Length-1. - public int Sample(float[] cmf) + /// The number of possible branches, i.e. the effective size of the cmf array. + /// A sampled index from the CMF ranging from 0 to branchSize-1. + public int Sample(float[] cmf, int branchSize) { - var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1]; + var p = (float)m_Random.NextDouble() * cmf[branchSize - 1]; var cls = 0; while (cmf[cls] < p) { @@ -44,5 +45,15 @@ public int Sample(float[] cmf) return cls; } + + /// + /// Samples from the Multinomial distribution defined by the provided cumulative + /// mass function. + /// + /// A sampled index from the CMF ranging from 0 to cmf.Length-1. + public int Sample(float[] cmf) + { + return Sample(cmf, cmf.Length); + } } } diff --git a/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs b/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs index d2f28f7eca..b431f4f740 100644 --- a/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs +++ b/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs @@ -1,193 +1,47 @@ -using System; +using System.Collections.Generic; using Unity.Barracuda; using NUnit.Framework; -using UnityEngine; +using Unity.MLAgents.Actuators; using Unity.MLAgents.Inference; -using Unity.MLAgents.Inference.Utils; namespace Unity.MLAgents.Tests { public class DiscreteActionOutputApplierTest { [Test] - public void TestEvalP() + public void TestDiscreteApply() { - var m = new Multinomial(2018); + var actionSpec = ActionSpec.MakeDiscrete(3, 2); + const float smallLogProb = -1000.0f; + const float largeLogProb = -1.0f; - var src = new TensorProxy - { - data = new Tensor(1, 3, new[] { 0.1f, 0.2f, 0.7f }), - valueType = TensorProxy.TensorType.FloatingPoint - }; - - var dst = new TensorProxy - { - data = new Tensor(1, 3), - valueType = TensorProxy.TensorType.FloatingPoint - }; - - DiscreteActionOutputApplier.Eval(src, dst, m); - - float[] reference = { 2, 2, 1 }; - for (var i = 0; i < dst.data.length; i++) - { - Assert.AreEqual(reference[i], dst.data[i]); - ++i; - } - } - - [Test] - public void TestEvalLogits() - { - var m = new Multinomial(2018); - - var src = new TensorProxy + var logProbs = new TensorProxy { data = new Tensor( - 1, - 3, - new[] { Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50 }), - valueType = TensorProxy.TensorType.FloatingPoint - }; - - var dst = new TensorProxy - { - data = new Tensor(1, 3), - valueType = TensorProxy.TensorType.FloatingPoint - }; - - DiscreteActionOutputApplier.Eval(src, dst, m); - - float[] reference = { 2, 2, 2 }; - for (var i = 0; i < dst.data.length; i++) - { - Assert.AreEqual(reference[i], dst.data[i]); - ++i; - } - } - - [Test] - public void TestEvalBatching() - { - var m = new Multinomial(2018); - - var src = new TensorProxy - { - data = new Tensor(2, 3, new[] - { - Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50, - Mathf.Log(0.3f) - 25, Mathf.Log(0.4f) - 25, Mathf.Log(0.3f) - 25 - }), - valueType = TensorProxy.TensorType.FloatingPoint - }; - - var dst = new TensorProxy - { - data = new Tensor(2, 3), - valueType = TensorProxy.TensorType.FloatingPoint - }; - - DiscreteActionOutputApplier.Eval(src, dst, m); - - float[] reference = { 2, 2, 2, 0, 1, 0 }; - for (var i = 0; i < dst.data.length; i++) - { - Assert.AreEqual(reference[i], dst.data[i]); - ++i; - } - } - - [Test] - public void TestSrcInt() - { - var m = new Multinomial(2018); - - var src = new TensorProxy - { - valueType = TensorProxy.TensorType.Integer - }; - - Assert.Throws( - () => DiscreteActionOutputApplier.Eval(src, null, m)); - } - - [Test] - public void TestDstInt() - { - var m = new Multinomial(2018); - - var src = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint - }; - - var dst = new TensorProxy - { - valueType = TensorProxy.TensorType.Integer - }; - - Assert.Throws( - () => DiscreteActionOutputApplier.Eval(src, dst, m)); - } - - [Test] - public void TestSrcDataNull() - { - var m = new Multinomial(2018); - - var src = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint - }; - - var dst = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint - }; - - Assert.Throws( - () => DiscreteActionOutputApplier.Eval(src, dst, m)); - } - - [Test] - public void TestDstDataNull() - { - var m = new Multinomial(2018); - - var src = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint, - data = new Tensor(0, 1) - }; - - var dst = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint - }; - - Assert.Throws( - () => DiscreteActionOutputApplier.Eval(src, dst, m)); - } - - [Test] - public void TestUnequalBatchSize() - { - var m = new Multinomial(2018); - - var src = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint, - data = new Tensor(1, 1) - }; - - var dst = new TensorProxy - { - valueType = TensorProxy.TensorType.FloatingPoint, - data = new Tensor(2, 1) - }; - - Assert.Throws( - () => DiscreteActionOutputApplier.Eval(src, dst, m)); + 2, + 5, + new[] + { + smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0 + smallLogProb, largeLogProb, // Agent 0, branch 1 + largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0 + largeLogProb, smallLogProb, // Agent 1, branch 1 + }), + valueType = TensorProxy.TensorType.FloatingPoint + }; + + var applier = new DiscreteActionOutputApplier(actionSpec, 2020, null); + var agentIds = new List { 42, 1337 }; + var actionBuffers = new Dictionary(); + actionBuffers[42] = new ActionBuffers(actionSpec); + actionBuffers[1337] = new ActionBuffers(actionSpec); + + applier.Apply(logProbs, agentIds, actionBuffers); + Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]); + Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]); + + Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]); + Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]); } } }