Skip to content

Commit d1d46e3

Browse files
committed
Implement more TODOs in oci-validate code
I'm much happier with this interface now -- it can properly handle an image with attestations now, for example.
1 parent 2b04d9e commit d1d46e3

File tree

4 files changed

+181
-81
lines changed

4 files changed

+181
-81
lines changed

helpers/oci-import.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
9898
else . end
9999
100100
| .mediaType //= media_type_oci_index # TODO index normalize function? just force this to be set/valid instead?
101-
| validate_oci_index
101+
| validate_oci_index({ indexPlatformsOptional: true })
102102
| validate_length(.manifests; 1) # TODO allow upstream attestation in the future?
103103
104104
# purge maintainer-provided URLs / annotations (https://github.com/docker-library/bashbrew/blob/4e0ea8d8aba49d54daf22bd8415fabba65dc83ee/cmd/bashbrew/oci-builder.go#L146-L147)
@@ -123,7 +123,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
123123
$build
124124
| .source.arches[.build.arch].platform
125125
)
126-
# TODO .manifests[1].platform ?
127126
128127
# inject our build annotations
129128
| .manifests[0].annotations += (
@@ -136,8 +135,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp --tab '
136135
' "$file" | tee index.json.new
137136
mv -f index.json.new index.json
138137

139-
# TODO "crane validate" is definitely interesting here -- it essentially validates all the descriptors recursively, including diff_ids, but it only supports "remote" or "tarball" (which refers to the *old* "docker save" tarball format), so isn't useful here, but we need to do basically that exact work
140-
141138
# now that "index.json" represents the exact index we want to push, let's push it down into a blob and make a new appropriate "index.json" for "crane push"
142139
# TODO we probably want/need some "traverse/manipulate an OCI layout" helpers 😭
143140
mediaType="$(jq --raw-output '.mediaType' index.json)"
@@ -159,3 +156,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
159156
}
160157
| normalize_manifest
161158
' > index.json
159+
160+
# TODO move this further out
161+
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .

helpers/oci-sbom.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ cd "$output"
3838
imageIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
3939
include "oci";
4040
include "validate";
41-
validate_oci_index
41+
validate_oci_index({ indexPlatformsOptional: true })
4242
| validate_length(.manifests; 1)
4343
| validate_IN(.manifests[0].mediaType; media_types_index)
4444
| .manifests[0].digest
@@ -91,7 +91,7 @@ done
9191
sbomIndex="$(jq -L"$BASHBREW_META_SCRIPTS" --raw-output '
9292
include "oci";
9393
include "validate";
94-
validate_oci_index
94+
validate_oci_index({ indexPlatformsOptional: true })
9595
| validate_length(.manifests; 1)
9696
| validate_IN(.manifests[0].mediaType; media_types_index)
9797
| .manifests[0].digest
@@ -146,3 +146,6 @@ jq -L"$BASHBREW_META_SCRIPTS" --null-input --tab '
146146
}
147147
| normalize_manifest
148148
' > index.json
149+
150+
# TODO move this further out
151+
"$BASHBREW_META_SCRIPTS/helpers/oci-validate.sh" .

helpers/oci-validate.sh

Lines changed: 101 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
set -Eeuo pipefail
33

4-
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, some media types, layer diff_ids, etc)
4+
# given an OCI image layout (https://github.com/opencontainers/image-spec/blob/v1.1.1/image-layout.md), verifies all descriptors as much as possible (digest matches content, size, media types, layer diff_ids, etc)
55

66
layout="$1"; shift
77

@@ -23,85 +23,131 @@ jq -L"$BASHBREW_META_SCRIPTS" --slurp '
2323
| empty
2424
' oci-layout
2525

26-
# TODO this is all rubbish; it needs more thought (the jq functions it invokes are pretty solid now though)
26+
# TODO (recursively?) validate subject descriptors in here somewhere 🤔
2727

2828
descriptor() {
2929
local file="$1"; shift # "blobs/sha256/xxx"
30-
echo "blob: $file"
31-
local digest="$1"; shift # "sha256:xxx"
32-
local size="$1"; shift # "123"
33-
local algo="${digest%%:*}" # sha256
34-
local hash="${digest#$algo:}" # xxx
35-
local diskSize
36-
[ "$algo" = 'sha256' ] # TODO error message
37-
diskSize="$(stat --dereference --format '%s' "$file")"
38-
[ "$size" = "$diskSize" ] # TODO error message
39-
"${algo}sum" <<<"$hash *$file" --check --quiet --strict -
30+
local desc; desc="$(cat)"
31+
local shell
32+
shell="$(jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
33+
include "validate";
34+
include "oci";
35+
validate_one
36+
| validate_oci_descriptor
37+
| (
38+
@sh "local algo=\(
39+
.digest
40+
| split(":")[0]
41+
| validate_IN(.; "sha256", "sha512") # TODO more algorithms? need more tools on the host
42+
)",
43+
44+
@sh "local data=\(
45+
if has("data") then
46+
.data
47+
else " " end # empty string is valid base64 (which we should validate), but spaces are not, so we can use a single space to detect "data not set"
48+
)",
49+
50+
empty
51+
)
52+
')"
53+
eval "$shell"
54+
local digest size dataDigest= dataSize=
55+
digest="$("${algo}sum" "$file" | cut -d' ' -f1)"
56+
digest="$algo:$digest"
57+
size="$(stat --dereference --format '%s' "$file")"
58+
if [ "$data" != ' ' ]; then
59+
dataDigest="$(base64 <<<"$data" -d | "${algo}sum" | cut -d' ' -f1)"
60+
dataDigest="$algo:$dataDigest"
61+
dataSize="$(base64 <<<"$data" -d | wc --bytes)"
62+
# TODO *technically* we could get clever here and pass `base64 -d` to something like `tee >(wc --bytes) >(dig="$(sha256sum | cut -d' ' -f1)" && echo "sha256:$dig" && false) > /dev/null` to avoid parsing the base64 twice, but then failure cases are less likely to be caught, so it's safer to simply redecode (and we can't decode into a variable because this might be binary data *and* bash will do newline munging in both directions)
63+
fi
64+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --arg digest "$digest" --arg size "$size" --arg dataDigest "$dataDigest" --arg dataSize "$dataSize" '
65+
include "validate";
66+
validate_one
67+
| validate_IN(.digest; $digest)
68+
| validate_IN(.size; $size | tonumber)
69+
| if has("data") then
70+
validate(.data;
71+
$digest == $dataDigest
72+
and $size == $dataSize
73+
; "(decoded) data has size \($dataSize) and digest \($dataDigest) (expected \($size) and \($digest))")
74+
else . end
75+
| empty
76+
'
4077
}
4178

42-
images() {
43-
echo "image: $*"
79+
# TODO validate config (diff_ids, history, platform - gotta carry *two* levels of descriptors for that, and decompress all the layers 🙊)
80+
# TODO validate provenance/SBOM layer contents?
81+
82+
image() {
83+
local file="$1"; shift
84+
echo "image: $file"
85+
local desc; desc="$(cat)"
86+
descriptor <<<"$desc" "$file"
4487
local shell
4588
shell="$(
46-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
89+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
4790
include "validate";
4891
include "oci";
49-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
50-
validate_length(.; $expected | tonumber)
51-
| map(validate_oci_image)
92+
validate_length(.; 2)
93+
| .[0] as $desc
94+
| .[1]
95+
| validate_oci_image({
96+
imageAttestation: IN($desc.annotations["vnd.docker.reference.type"]; "attestation-manifest"),
97+
})
98+
| if $desc then
99+
validate_IN(.mediaType; $desc.mediaType)
100+
| validate_IN(.artifactType; $desc.artifactType)
101+
else . end
52102
| (
53103
(
54-
.[].config, .[].layers[]
55-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
56-
# TODO data?
104+
.config, .layers[]
105+
| @sh "descriptor <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
57106
),
58107
59108
empty # trailing comma
60109
)
61-
' "$@"
110+
' /dev/stdin "$file"
62111
)"
63112
eval "$shell"
64113
}
65114

66-
# TODO pass descriptor values down so we can validate that they match (.mediaType, .artifactType, .platform across *two* levels index->manifest->config), similar to .data
67-
# TODO disallow urls completely?
68-
69-
indexes() {
70-
echo "index: $*"
115+
index() {
116+
local file="$1"; shift
117+
echo "index: $file"
118+
local desc; desc="$(cat)"
119+
if [ "$desc" != 'null' ]; then
120+
descriptor <<<"$desc" "$file"
121+
fi
71122
local shell
72123
shell="$(
73-
jq -L"$BASHBREW_META_SCRIPTS" --arg expected "$#" --slurp --raw-output '
124+
jq <<<"$desc" -L"$BASHBREW_META_SCRIPTS" --slurp --raw-output '
74125
include "validate";
75126
include "oci";
76-
# TODO technically, this would pass if one file is empty and another file has two documents in it (since it is counting the total), so that is not great, but probably is not a real problem
77-
validate_length(.; $expected | tonumber)
78-
| map(validate_oci_index)
127+
validate_length(.; 2)
128+
| .[0] as $desc
129+
| .[1]
130+
| validate_oci_index({
131+
indexPlatformsOptional: (input_filename == "index.json"),
132+
})
133+
| if $desc then
134+
validate_IN(.mediaType; $desc.mediaType)
135+
| validate_IN(.artifactType; $desc.artifactType)
136+
else . end
137+
| .manifests[]
79138
| (
80-
(
81-
.[].manifests[]
82-
| @sh "descriptor \("blobs/\(.digest | sub(":"; "/"))") \(.digest) \(.size)"
83-
# TODO data?
84-
),
85-
86-
(
87-
[ .[].manifests[] | select(IN(.mediaType; media_types_image)) | .digest ]
88-
| if length > 0 then
89-
"images \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
90-
else empty end
91-
),
92-
93-
(
94-
[ .[].manifests[] | select(IN(.mediaType; media_types_index)) | .digest ]
95-
| if length > 0 then
96-
"indexes \(map("blobs/\(sub(":"; "/"))" | @sh) | join(" "))"
97-
else empty end
98-
),
99-
100-
empty # trailing comma
101-
)
102-
' "$@"
139+
.mediaType
140+
| if IN(media_types_index) then
141+
"index"
142+
elif IN(media_types_image) then
143+
"image"
144+
else
145+
error("UNSUPPORTED MEDIA TYPE: \(.)")
146+
end
147+
) + @sh " <<<\(tojson) \(.digest | "blobs/\(sub(":"; "/"))")"
148+
' /dev/stdin "$file"
103149
)"
104150
eval "$shell"
105151
}
106152

107-
indexes index.json
153+
index <<<'null' index.json

0 commit comments

Comments
 (0)