Skip to content

Commit 53403b5

Browse files
cjcullenniemeyer
authored andcommitted
Optimize cases with long potential simple_keys (#555)
This change introduces an index to lookup token numbers referenced by simple_keys in O(1), thus significantly reducing the performance impact of certain abusively constructed snippets. When we build up the simple_keys stack, we count on the (formerly named) staleness check to catch errors where a simple key is required but would be > 1024 chars or span lines. The previous simplification that searches the stack from the top can go 1024 keys deep before finding a "stale" key and stopping. I added a test that shows that this consumes ~3s per 1MB of document size.
1 parent 1f64d61 commit 53403b5

File tree

3 files changed

+26
-27
lines changed

3 files changed

+26
-27
lines changed

limit_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ var limitTests = []struct {
3939
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
4040
{name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
4141
{name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))},
42+
{name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))},
4243
}
4344

4445
func (s *S) TestLimits(c *C) {
@@ -92,6 +93,10 @@ func BenchmarkDeepFlow(b *testing.B) {
9293
benchmark(b, "1000kb slice nested in maps at max-depth")
9394
}
9495

96+
func Benchmark1000KBMaxDepthNested(b *testing.B) {
97+
benchmark(b, "1000kb of 10000-nested lines")
98+
}
99+
95100
func benchmark(b *testing.B, name string) {
96101
for _, t := range limitTests {
97102
if t.name != name {

scannerc.go

+20-27
Original file line numberDiff line numberDiff line change
@@ -626,32 +626,18 @@ func trace(args ...interface{}) func() {
626626
func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
627627
// While we need more tokens to fetch, do it.
628628
for {
629-
// Check if we really need to fetch more tokens.
630-
need_more_tokens := false
631-
632-
if parser.tokens_head == len(parser.tokens) {
633-
// Queue is empty.
634-
need_more_tokens = true
635-
} else {
636-
// Check if any potential simple key may occupy the head position.
637-
for i := len(parser.simple_keys) - 1; i >= 0; i-- {
638-
simple_key := &parser.simple_keys[i]
639-
if simple_key.token_number < parser.tokens_parsed {
640-
break
641-
}
642-
if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
643-
return false
644-
} else if valid && simple_key.token_number == parser.tokens_parsed {
645-
need_more_tokens = true
646-
break
647-
}
629+
if parser.tokens_head != len(parser.tokens) {
630+
// If queue is non-empty, check if any potential simple key may
631+
// occupy the head position.
632+
head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
633+
if !ok {
634+
break
635+
} else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
636+
return false
637+
} else if !valid {
638+
break
648639
}
649640
}
650-
651-
// We are finished.
652-
if !need_more_tokens {
653-
break
654-
}
655641
// Fetch the next token.
656642
if !yaml_parser_fetch_next_token(parser) {
657643
return false
@@ -883,6 +869,7 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
883869
return false
884870
}
885871
parser.simple_keys[len(parser.simple_keys)-1] = simple_key
872+
parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
886873
}
887874
return true
888875
}
@@ -897,9 +884,10 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
897884
"while scanning a simple key", parser.simple_keys[i].mark,
898885
"could not find expected ':'")
899886
}
887+
// Remove the key from the stack.
888+
parser.simple_keys[i].possible = false
889+
delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
900890
}
901-
// Remove the key from the stack.
902-
parser.simple_keys[i].possible = false
903891
return true
904892
}
905893

@@ -930,7 +918,9 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
930918
func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
931919
if parser.flow_level > 0 {
932920
parser.flow_level--
933-
parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1]
921+
last := len(parser.simple_keys) - 1
922+
delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
923+
parser.simple_keys = parser.simple_keys[:last]
934924
}
935925
return true
936926
}
@@ -1007,6 +997,8 @@ func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
1007997
// Initialize the simple key stack.
1008998
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
1009999

1000+
parser.simple_keys_by_tok = make(map[int]int)
1001+
10101002
// A simple key is allowed at the beginning of the stream.
10111003
parser.simple_key_allowed = true
10121004

@@ -1310,6 +1302,7 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
13101302

13111303
// Remove the simple key.
13121304
simple_key.possible = false
1305+
delete(parser.simple_keys_by_tok, simple_key.token_number)
13131306

13141307
// A simple key cannot follow another simple key.
13151308
parser.simple_key_allowed = false

yamlh.go

+1
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,7 @@ type yaml_parser_t struct {
579579

580580
simple_key_allowed bool // May a simple key occur at the current position?
581581
simple_keys []yaml_simple_key_t // The stack of simple keys.
582+
simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number
582583

583584
// Parser stuff
584585

0 commit comments

Comments
 (0)