Skip to content

Commit bf77438

Browse files
author
Ray Smith
committed
Updated comments on RemapOutputs
1 parent 0382222 commit bf77438

13 files changed

+38
-49
lines changed

lstm/fullyconnected.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,9 @@ int FullyConnected::InitWeights(float range, TRand* randomizer) {
8484
return num_weights_;
8585
}
8686

87-
// Changes the number of outputs to the size of the given code_map, copying
88-
// the old weight matrix entries for each output from code_map[output] where
89-
// non-negative, and uses the mean (over all outputs) of the existing weights
90-
// for all outputs with negative code_map entries. Returns the new number of
91-
// weights. Only operates on Softmax layers with old_no outputs.
87+
// Recursively searches the network for softmaxes with old_no outputs,
88+
// and remaps their outputs according to code_map. See network.h for details.
89+
9290
int FullyConnected::RemapOutputs(int old_no, const std::vector<int>& code_map) {
9391
if (type_ == NT_SOFTMAX && no_ == old_no) {
9492
num_weights_ = weights_.RemapOutputs(code_map);

lstm/fullyconnected.h

+2-5
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,8 @@ class FullyConnected : public Network {
6868
// Sets up the network for training. Initializes weights using weights of
6969
// scale `range` picked according to the random number generator `randomizer`.
7070
virtual int InitWeights(float range, TRand* randomizer);
71-
// Changes the number of outputs to the size of the given code_map, copying
72-
// the old weight matrix entries for each output from code_map[output] where
73-
// non-negative, and uses the mean (over all outputs) of the existing weights
74-
// for all outputs with negative code_map entries. Returns the new number of
75-
// weights. Only operates on Softmax layers with old_no outputs.
71+
// Recursively searches the network for softmaxes with old_no outputs,
72+
// and remaps their outputs according to code_map. See network.h for details.
7673
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
7774

7875
// Converts a float network to an int network.

lstm/lstm.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,8 @@ int LSTM::InitWeights(float range, TRand* randomizer) {
140140
return num_weights_;
141141
}
142142

143-
// Changes the number of outputs to the size of the given code_map, copying
144-
// the old weight matrix entries for each output from code_map[output] where
145-
// non-negative, and uses the mean (over all outputs) of the existing weights
146-
// for all outputs with negative code_map entries. Returns the new number of
147-
// weights. Only operates on Softmax layers with old_no outputs.
143+
// Recursively searches the network for softmaxes with old_no outputs,
144+
// and remaps their outputs according to code_map. See network.h for details.
148145
int LSTM::RemapOutputs(int old_no, const std::vector<int>& code_map) {
149146
if (softmax_ != NULL) {
150147
num_weights_ -= softmax_->num_weights();

lstm/lstm.h

+2-5
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,8 @@ class LSTM : public Network {
7676
// Sets up the network for training. Initializes weights using weights of
7777
// scale `range` picked according to the random number generator `randomizer`.
7878
virtual int InitWeights(float range, TRand* randomizer);
79-
// Changes the number of outputs to the size of the given code_map, copying
80-
// the old weight matrix entries for each output from code_map[output] where
81-
// non-negative, and uses the mean (over all outputs) of the existing weights
82-
// for all outputs with negative code_map entries. Returns the new number of
83-
// weights. Only operates on Softmax layers with old_no outputs.
79+
// Recursively searches the network for softmaxes with old_no outputs,
80+
// and remaps their outputs according to code_map. See network.h for details.
8481
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
8582

8683
// Converts a float network to an int network.

lstm/lstmtrainer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char* filename,
135135
filename == old_traineddata) {
136136
return true; // Normal checkpoint load complete.
137137
}
138-
tprintf("Code range changed from %d to %d!!\n", network_->NumOutputs(),
138+
tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
139139
recoder_.code_range());
140140
if (old_traineddata == nullptr || *old_traineddata == '\0') {
141141
tprintf("Must supply the old traineddata for code conversion!\n");

lstm/lstmtrainer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ class LSTMTrainer : public LSTMRecognizer {
9999

100100
// Tries to deserialize a trainer from the given file and silently returns
101101
// false in case of failure. If old_traineddata is not null, then it is
102-
// assumed that the character set is to be re-mapped from old_traininddata to
102+
// assumed that the character set is to be re-mapped from old_traineddata to
103103
// the new, with consequent change in weight matrices etc.
104104
bool TryLoadingCheckpoint(const char* filename, const char* old_traineddata);
105105

lstm/network.h

+11-5
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,17 @@ class Network {
172172
// and should not be deleted by any of the networks.
173173
// Returns the number of weights initialized.
174174
virtual int InitWeights(float range, TRand* randomizer);
175-
// Changes the number of outputs to the size of the given code_map, copying
176-
// the old weight matrix entries for each output from code_map[output] where
177-
// non-negative, and uses the mean (over all outputs) of the existing weights
178-
// for all outputs with negative code_map entries. Returns the new number of
179-
// weights. Only operates on Softmax layers with old_no outputs.
175+
// Changes the number of outputs to the outside world to the size of the given
176+
// code_map. Recursively searches the entire network for Softmax layers that
177+
// have exactly old_no outputs, and operates only on those, leaving all others
178+
// unchanged. This enables networks with multiple output layers to get all
179+
// their softmaxes updated, but if an internal layer, uses one of those
180+
// softmaxes for input, then the inputs will effectively be scrambled.
181+
// TODO(rays) Fix this before any such network is implemented.
182+
// The softmaxes are resized by copying the old weight matrix entries for each
183+
// output from code_map[output] where non-negative, and uses the mean (over
184+
// all outputs) of the existing weights for all outputs with negative code_map
185+
// entries. Returns the new number of weights.
180186
virtual int RemapOutputs(int old_no, const std::vector<int>& code_map) {
181187
return 0;
182188
}

lstm/plumbing.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,8 @@ int Plumbing::InitWeights(float range, TRand* randomizer) {
5757
return num_weights_;
5858
}
5959

60-
// Changes the number of outputs to the size of the given code_map, copying
61-
// the old weight matrix entries for each output from code_map[output] where
62-
// non-negative, and uses the mean (over all outputs) of the existing weights
63-
// for all outputs with negative code_map entries. Returns the new number of
64-
// weights. Only operates on Softmax layers with old_no outputs.
60+
// Recursively searches the network for softmaxes with old_no outputs,
61+
// and remaps their outputs according to code_map. See network.h for details.
6562
int Plumbing::RemapOutputs(int old_no, const std::vector<int>& code_map) {
6663
num_weights_ = 0;
6764
for (int i = 0; i < stack_.size(); ++i) {

lstm/plumbing.h

+2-5
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,8 @@ class Plumbing : public Network {
5757
// and should not be deleted by any of the networks.
5858
// Returns the number of weights initialized.
5959
virtual int InitWeights(float range, TRand* randomizer);
60-
// Changes the number of outputs to the size of the given code_map, copying
61-
// the old weight matrix entries for each output from code_map[output] where
62-
// non-negative, and uses the mean (over all outputs) of the existing weights
63-
// for all outputs with negative code_map entries. Returns the new number of
64-
// weights. Only operates on Softmax layers with old_no outputs.
60+
// Recursively searches the network for softmaxes with old_no outputs,
61+
// and remaps their outputs according to code_map. See network.h for details.
6562
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
6663

6764
// Converts a float network to an int network.

lstm/series.cpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,8 @@ int Series::InitWeights(float range, TRand* randomizer) {
6060
return num_weights_;
6161
}
6262

63-
// Changes the number of outputs to the size of the given code_map, copying
64-
// the old weight matrix entries for each output from code_map[output] where
65-
// non-negative, and uses the mean (over all outputs) of the existing weights
66-
// for all outputs with negative code_map entries. Returns the new number of
67-
// weights. Only operates on Softmax layers with old_no outputs.
63+
// Recursively searches the network for softmaxes with old_no outputs,
64+
// and remaps their outputs according to code_map. See network.h for details.
6865
int Series::RemapOutputs(int old_no, const std::vector<int>& code_map) {
6966
num_weights_ = 0;
7067
tprintf("Num (Extended) outputs,weights in Series:\n");

lstm/series.h

+2-5
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,8 @@ class Series : public Plumbing {
4646
// scale `range` picked according to the random number generator `randomizer`.
4747
// Returns the number of weights initialized.
4848
virtual int InitWeights(float range, TRand* randomizer);
49-
// Changes the number of outputs to the size of the given code_map, copying
50-
// the old weight matrix entries for each output from code_map[output] where
51-
// non-negative, and uses the mean (over all outputs) of the existing weights
52-
// for all outputs with negative code_map entries. Returns the new number of
53-
// weights. Only operates on Softmax layers with old_no outputs.
49+
// Recursively searches the network for softmaxes with old_no outputs,
50+
// and remaps their outputs according to code_map. See network.h for details.
5451
int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
5552

5653
// Sets needs_to_backprop_ to needs_backprop and returns true if

lstm/weightmatrix.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam,
6161
// the old weight matrix entries for each output from code_map[output] where
6262
// non-negative, and uses the mean (over all outputs) of the existing weights
6363
// for all outputs with negative code_map entries. Returns the new number of
64-
// weights.
64+
// weights. Can be used to change the character set addressed by an output
65+
// softmax.
66+
// TODO(rays) A RemapInputs would also be useful, so a change can be made
67+
// in the middle of a network.
6568
int WeightMatrix::RemapOutputs(const std::vector<int>& code_map) {
6669
GENERIC_2D_ARRAY<double> old_wf(wf_);
6770
int old_no = wf_.dim1();

lstm/weightmatrix.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,10 @@ class WeightMatrix {
7474
// the old weight matrix entries for each output from code_map[output] where
7575
// non-negative, and uses the mean (over all outputs) of the existing weights
7676
// for all outputs with negative code_map entries. Returns the new number of
77-
// weights.
77+
// weights. Can be used to change the character set addressed by an output
78+
// softmax.
79+
// TODO(rays) A RemapInputs would also be useful, so a change can be made
80+
// in the middle of a network.
7881
int RemapOutputs(const std::vector<int>& code_map);
7982

8083
// Converts a float network to an int network. Each set of input weights that

0 commit comments

Comments
 (0)