Skip to content

Commit a027813

Browse files
committed
Implement more TODOs in oci-validate code
I'm much happier with this interface now -- it can properly handle an image with attestations now, for example.
1 parent 2b04d9e commit a027813

File tree

4 files changed

+172
-75
lines changed

4 files changed

+172
-75
lines changed

helpers/oci-import.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
9898
else . end
9999
100100
| .mediaType //= media_type_oci_index # TODO index normalize function? just force this to be set/valid instead?
101-
| validate_oci_index
101+
| validate_oci_index({ indexPlatformsOptional: true })
102102
| validate_length(.manifests; 1) # TODO allow upstream attestation in the future?
103103
104104
# purge maintainer-provided URLs / annotations (https://github.com/docker-library/bashbrew/blob/4e0ea8d8aba49d54daf22bd8415fabba65dc83ee/cmd/bashbrew/oci-builder.go#L146-L147)
@@ -123,7 +123,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
123123
$build
124124
| .source.arches[.build.arch].platform
125125
)
126-
# TODO .manifests[1].platform ?
127126
128127
# inject our build annotations
129128
| .manifests[0].annotations += (
@@ -136,8 +135,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
136135
' "$file" | tee index.json.new
137136
mv -f index.json.new index.json
138137

139-
# TODO "crane validate" is definitely interesting here -- it essentially validates all the descriptors recursively, including diff_ids, but it only supports "remote" or "tarball" (which refers to the *old* "docker save" tarball format), so isn't useful here, but we need to do basically that exact work
140-
141138
# now that "index.json" represents the exact index we want to push, let's push it down into a blob and make a new appropriate "index.json" for "crane push"
142139
# TODO we probably want/need some "traverse/manipulate an OCI layout" helpers 😭
143140
mediaType="$(jq --raw-output '.mediaType' index.json)"
@@ -159,3 +156,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
159156
}
160157
| normalize_manifest
161158
' > index.json
159+
160+
# TODO move this further out
161+
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .

helpers/oci-sbom.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ cd "$output"
3838
imageIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
3939
include "oci";
4040
include "validate";
41-
validate_oci_index
41+
validate_oci_index({ indexPlatformsOptional: true })
4242
| validate_length(.manifests; 1)
4343
| validate_IN(.manifests[0].mediaType; media_types_index)
4444
| .manifests[0].digest
@@ -91,7 +91,7 @@ done
9191
sbomIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
9292
include "oci";
9393
include "validate";
94-
validate_oci_index
94+
validate_oci_index({ indexPlatformsOptional: true })
9595
| validate_length(.manifests; 1)
9696
| validate_IN(.manifests[0].mediaType; media_types_index)
9797
| .manifests[0].digest
@@ -146,3 +146,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
146146
}
147147
| normalize_manifest
148148
' > index.json
149+
150+
# TODO move this further out
151+
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .

helpers/oci-validate.sh

Lines changed: 101 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
set -Eeuo pipefail
33

4-
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, some media types, layer diff_ids, etc)
4+
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, media types, layer diff_ids, etc)
55

66
layout="$1"; shift
77

@@ -23,85 +23,131 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp '
2323
| empty
2424
' oci-layout
2525

26-
# TODO this is all rubbish; it needs more thought (the jq functions it invokes are pretty solid now though)
26+
# TODO (recursively?) validate subject descriptors in here somewhere 🤔
2727

2828
descriptor() {
2929
local file="$1"; shift # "blobs/sha256/xxx"
30-
echo "blob: $file"
31-
local digest="$1"; shift # "sha256:xxx"
32-
local size="$1"; shift # "123"
33-
local algo="${digest%%:*}" # sha256
34-
local hash="${digest#$algo:}" # xxx
35-
local diskSize
36-
[ "$algo" = 'sha256' ] # TODO error message
37-
diskSize="$(stat --dereference --format '%s' "$file")"
38-
[ "$size" = "$diskSize" ] # TODO error message
39-
"${algo}sum" <<<"$hash *$file" --check --quiet --strict -
30+
local desc; desc="$(cat)"
31+
local shell
32+
shell="$(jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
33+
include "validate";
34+
include "oci";
35+
validate_one
36+
| validate_oci_descriptor
37+
| (
38+
@sh "local algo=\(
39+
.digest
40+
| split(":")[0]
41+
| validate_IN(.; "sha256", "sha512") # TODO more algorithms? need more tools on the host
42+
)",
43+
44+
@sh "local data=\(
45+
if has("data") then
46+
.data
47+
else " " end # empty string is valid base64 (which we should validate), but spaces are not, so we can use a single space to detect "data not set"
48+
)",
49+
50+
empty
51+
)
52+
')"
53+
eval "$shell"
54+
local digest size dataDigest= dataSize=
55+
digest="$("${algo}sum" "$file" | cut -d' ' -f1)"
56+
digest="$algo:$digest"
57+
size="$(stat --dereference --format '%s' "$file")"
58+
if [ "$data" != ' ' ]; then
59+
dataDigest="$(base64 <<<"$data" -d | "${algo}sum" | cut -d' ' -f1)"
60+
dataDigest="$algo:$dataDigest"
61+
dataSize="$(base64 <<<"$data" -d | wc --bytes)"
62+
# TODO *technically* we could get clever here and pass `base64 -d` to something like `tee >(wc --bytes) >(dig="$(sha256sum | cut -d' ' -f1)" && echo "sha256:$dig" && false) > /dev/null` to avoid parsing the base64 twice, but then failure cases are less likely to be caught, so it's safer to simply redecode (and we can't decode into a variable because this might be binary data *and* bash will do newline munging in both directions)
63+
fi
64+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --arg digest "$digest" --arg size "$size" --arg dataDigest "$dataDigest" --arg dataSize "$dataSize" '
65+
include "validate";
66+
validate_one
67+
| validate_IN(.digest; $digest)
68+
| validate_IN(.size; $size | tonumber)
69+
| if has("data") then
70+
validate(.data;
71+
$digest == $dataDigest
72+
and $size == $dataSize
73+
; "(decoded) data has size \($dataSize) and digest \($dataDigest) (expected \($size) and \($digest))")
74+
else . end
75+
| empty
76+
'
4077
}
4178

42-
images() {
43-
echo "image: $*"
79+
# TODO validate config (diff_ids, history, platform - gotta carry *two* levels of descriptors for that, and decompress all the layers 🙊)
80+
# TODO validate provenance/SBOM layer contents?
81+
82+
image() {
83+
local file="$1"; shift
84+
echo "image: $file"
85+
local desc; desc="$(cat)"
86+
descriptor <<<"$desc" "$file"
4487
local shell
4588
shell="$(
46-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
89+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
4790
include "validate";
4891
include "oci";
49-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
50-
validate_length(.; $expected | tonumber)
51-
| map(validate_oci_image)
92+
validate_length(.; 2)
93+
| .[0] as $desc
94+
| .[1]
95+
| validate_oci_image({
96+
imageAttestation: IN($desc.annotations["vnd.docker.reference.type"]; "attestation-manifest"),
97+
})
98+
| if $desc then
99+
validate_IN(.mediaType; $desc.mediaType)
100+
| validate_IN(.artifactType; $desc.artifactType)
101+
else . end
52102
| (
53103
(
54-
.[].config, .[].layers[]
55-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
56-
# TODO data?
104+
.config, .layers[]
105+
| @sh "descriptor <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
57106
),
58107
59108
empty # trailing comma
60109
)
61-
' "$@"
110+
' /dev/stdin "$file"
62111
)"
63112
eval "$shell"
64113
}
65114

66-
# TODO pass descriptor values down so we can validate that they match (.mediaType, .artifactType, .platform across *two* levels index->manifest->config), similar to .data
67-
# TODO disallow urls completely?
68-
69-
indexes() {
70-
echo "index: $*"
115+
index() {
116+
local file="$1"; shift
117+
echo "index: $file"
118+
local desc; desc="$(cat)"
119+
if [ "$desc" != 'null' ]; then
120+
descriptor <<<"$desc" "$file"
121+
fi
71122
local shell
72123
shell="$(
73-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
124+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
74125
include "validate";
75126
include "oci";
76-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
77-
validate_length(.; $expected | tonumber)
78-
| map(validate_oci_index)
127+
validate_length(.; 2)
128+
| .[0] as $desc
129+
| .[1]
130+
| validate_oci_index({
131+
indexPlatformsOptional: (input_filename == "index.json"),
132+
})
133+
| if $desc then
134+
validate_IN(.mediaType; $desc.mediaType)
135+
| validate_IN(.artifactType; $desc.artifactType)
136+
else . end
137+
| .manifests[]
79138
| (
80-
(
81-
.[].manifests[]
82-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
83-
# TODO data?
84-
),
85-
86-
(
87-
[ .[].manifests[] | select(IN(.mediaType; media_types_image)) | .digest ]
88-
| if length > 0 then
89-
"images \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
90-
else empty end
91-
),
92-
93-
(
94-
[ .[].manifests[] | select(IN(.mediaType; media_types_index)) | .digest ]
95-
| if length > 0 then
96-
"indexes \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
97-
else empty end
98-
),
99-
100-
empty # trailing comma
101-
)
102-
' "$@"
139+
.mediaType
140+
| if IN(media_types_index) then
141+
"index"
142+
elif IN(media_types_image) then
143+
"image"
144+
else
145+
error("UNSUPPORTED MEDIA TYPE: \(.)")
146+
end
147+
) + @sh " <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
148+
' /dev/stdin "$file"
103149
)"
104150
eval "$shell"
105151
}
106152

107-
indexes index.json
153+
index <<<'null' index.json

oci.jq

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ def validate_oci_digest:
165165
| {
166166
"sha256": [ 64 ],
167167
"sha512": [ 128 ],
168+
# TODO "blake3": [ 64 ], # https://github.com/opencontainers/image-spec/pull/1240
168169
} as $lengths
169170
| validate_IN($dig.algorithm; $lengths | keys[])
170171
| validate_length($dig.encoded; $lengths[$dig.algorithm][])
@@ -175,6 +176,7 @@ def validate_oci_annotations_haver:
175176
if has("annotations") then
176177
validate(.annotations; type == "object"; "if present, annotations must be an object")
177178
| validate(.annotations[]; type == "string"; "annotation values must be strings")
179+
# TODO validate that keys are not bare words (reverse DNS or vendor/bar)
178180
else . end
179181
;
180182

@@ -191,7 +193,11 @@ def validate_oci_descriptor:
191193
| validate(.size; . == floor; "size must be whole")
192194
| validate(.size; . == ceil; "size must be whole")
193195

194-
# TODO urls?
196+
| if has("urls") then
197+
validate(.urls; type == "array")
198+
| validate(.urls[]; type == "string")
199+
| validate_length(.urls; 0) # TODO this intentionally contradicts the above lines -- are there cases where we should allow urls?
200+
else . end
195201

196202
| validate_oci_annotations_haver
197203

@@ -205,7 +211,9 @@ def validate_oci_descriptor:
205211
# someday, maybe we can validate that .data matches .digest here (needs more jq functionality, including and especially the ability to deal with non-UTF8 binary data from base64 and perform sha256 over it)
206212
else . end
207213

208-
# TODO artifactType?
214+
| if has("artifactType") then
215+
validate(.artifactType; type == "string")
216+
else . end
209217

210218
# https://github.com/opencontainers/image-spec/blob/v1.1.1/image-index.md#image-index-property-descriptions
211219
| if has("platform") then
@@ -240,47 +248,87 @@ def validate_oci_subject_haver:
240248
;
241249

242250
# https://github.com/opencontainers/image-spec/blob/v1.1.1/image-index.md
243-
def validate_oci_index:
251+
def validate_oci_index($opt):
244252
validate_IN(type; "object")
245253
| validate_IN(.schemaVersion; 2)
246-
| validate_IN(.mediaType; media_types_index) # TODO allow "null" here too? (https://github.com/opencontainers/image-spec/security/advisories/GHSA-77vh-xpmg-72qh)
247-
# TODO artifactType?
254+
| validate_IN(.mediaType; media_types_index)
255+
| if has("artifactType") then
256+
validate(.artifactType; type == "string")
257+
| validate_IN(.artifactType; null) # TODO acceptable values? (this check intentionally contradicts the one above so artifactType generates an error)
258+
else . end
248259
| validate(.manifests[];
249260
validate_oci_descriptor
250261
| validate_IN(.mediaType; media_types_index, media_types_image)
251262
| validate(.size; . > 2; "manifest size must be at *least* big enough for {} plus *some* content")
252263
# https://github.com/opencontainers/distribution-spec/pull/293#issuecomment-1452780554
253264
| validate(.size; . <= 4 * 1024 * 1024; "manifest size must be 4MiB (\(4 * 1024 * 1024)) or less")
265+
266+
# slightly stricter enforcement than "validate_oci_descriptor" by default
267+
| if $opt.indexPlatformsOptional then . else
268+
validate(.platform; type == "object")
269+
end
270+
271+
# https://github.com/moby/buildkit/blob/c6145c2423de48f891862ac02f9b2653864d3c9e/docs/attestations/attestation-storage.md
272+
| if .annotations | has("vnd.docker.reference.type") or has("vnd.docker.reference.digest") then
273+
validate_IN(.mediaType; media_type_oci_image)
274+
| validate_IN(.artifactType; null, "application/vnd.docker.attestation.manifest.v1+json") # https://github.com/moby/buildkit/pull/5573/files#r2069525281
275+
| validate_IN(.annotations["vnd.docker.reference.type"]; "attestation-manifest")
276+
| validate(.annotations["vnd.docker.reference.digest"]; validate_oci_digest)
277+
| validate_IN(.platform.os; "unknown")
278+
| validate_IN(.platform.architecture; "unknown")
279+
else
280+
validate_IN(.artifactType; null)
281+
end
254282
)
255-
# TODO if any manifest has "vnd.docker.reference.digest", validate the subject exists in the list
283+
| if any(.manifests[].annotations; has("vnd.docker.reference.digest")) then
284+
[ .manifests[].digest ] as $digests
285+
| validate_IN(.manifests[].annotations["vnd.docker.reference.digest"]; null, $digests[])
286+
else . end
256287
| validate_oci_subject_haver
257288
| validate_oci_annotations_haver
258289
;
290+
def validate_oci_index: validate_oci_index({});
259291

260292
# https://github.com/opencontainers/image-spec/blob/v1.1.1/manifest.md
261-
def validate_oci_image:
293+
def validate_oci_image($opt):
262294
validate_IN(type; "object")
263295
| validate_IN(.schemaVersion; 2)
264-
| validate_IN(.mediaType; media_types_image) # TODO allow "null" here too? (https://github.com/opencontainers/image-spec/security/advisories/GHSA-77vh-xpmg-72qh)
265-
# TODO artifactType (but only selectively / certain values)
296+
| validate_IN(.mediaType; media_types_image)
297+
| if has("artifactType") then
298+
validate(.artifactType; type == "string")
299+
| validate_IN(.artifactType;
300+
if $opt.imageAttestation then
301+
"application/vnd.docker.attestation.manifest.v1+json" # https://github.com/moby/buildkit/pull/5573/files#r2069525281
302+
else null end # (this check intentionally contradicts the one above so artifactType normally generates an error)
303+
)
304+
else . end
266305
| validate(.config;
267306
validate_oci_descriptor
268307
| validate(.size; . >= 2; "config must be at *least* big enough for {}")
269308
| validate_IN(.mediaType; media_types_config)
309+
| validate_IN(.artifactType; null)
270310
)
271311
| validate(.layers[];
272312
validate_oci_descriptor
273-
| validate_IN(.mediaType; media_types_layer) # TODO allow "application/vnd.in-toto+json" and friends, but selectively 🤔
313+
| if $opt.imageAttestation then
314+
# https://github.com/moby/buildkit/blob/c6145c2423de48f891862ac02f9b2653864d3c9e/docs/attestations/attestation-storage.md
315+
validate_IN(.mediaType; "application/vnd.in-toto+json")
316+
| validate_IN(.annotations["in-toto.io/predicate-type"];
317+
"https://slsa.dev/provenance/v0.2",
318+
"https://spdx.dev/Document",
319+
empty # trailing comma
320+
)
321+
else
322+
validate_IN(.mediaType; media_types_layer)
323+
end
324+
| validate_IN(.artifactType; null)
274325
)
275326
| validate_oci_subject_haver
276327
| validate_oci_annotations_haver
277328
;
329+
def validate_oci_image: validate_oci_image({});
278330

279331
# https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md#oci-layout-file
280332
def validate_oci_layout_file:
281333
validate_IN(.imageLayoutVersion; "1.0.0")
282334
;
283-
284-
# TODO validate digest, size of blobs (*somewhere*, probably not here - this is all "cheap" validations / version+ordering+format assumption validations)
285-
# TODO if .data, validate that somehow too (size, digest); https://github.com/jqlang/jq/issues/1116#issuecomment-2515814615
286-
# TODO also we should validate that the length of every/any manifest is <= 4MiB (https://github.com/opencontainers/distribution-spec/pull/293#issuecomment-1452780554)

0 commit comments

Comments
 (0)