Skip to content

Commit 0efb401

Browse files
committed
Merge pull request #3487 from vtbassmatt/huge-file-smudge-clean
Teach Git to handle huge files in smudge/clean
2 parents 58e2ad2 + afd769c commit 0efb401

11 files changed

+88
-19
lines changed

convert.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,
613613

614614
struct filter_params {
615615
const char *src;
616-
unsigned long size;
616+
size_t size;
617617
int fd;
618618
const char *cmd;
619619
const char *path;

delta.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
9090
const unsigned char *top)
9191
{
9292
const unsigned char *data = *datap;
93-
unsigned long cmd, size = 0;
93+
size_t cmd, size = 0;
9494
int i = 0;
9595
do {
9696
cmd = *data++;
97-
size |= (cmd & 0x7f) << i;
97+
size |= st_left_shift(cmd & 0x7f, i);
9898
i += 7;
9999
} while (cmd & 0x80 && data < top);
100100
*datap = data;
101-
return size;
101+
return cast_size_t_to_ulong(size);
102102
}
103103

104104
#endif

entry.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode)
8282
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
8383
}
8484

85-
void *read_blob_entry(const struct cache_entry *ce, unsigned long *size)
85+
void *read_blob_entry(const struct cache_entry *ce, size_t *size)
8686
{
8787
enum object_type type;
88-
void *blob_data = read_object_file(&ce->oid, &type, size);
88+
unsigned long ul;
89+
void *blob_data = read_object_file(&ce->oid, &type, &ul);
8990

91+
*size = ul;
9092
if (blob_data) {
9193
if (type == OBJ_BLOB)
9294
return blob_data;
@@ -271,7 +273,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca
271273
int fd, ret, fstat_done = 0;
272274
char *new_blob;
273275
struct strbuf buf = STRBUF_INIT;
274-
unsigned long size;
276+
size_t size;
275277
ssize_t wrote;
276278
size_t newsize = 0;
277279
struct stat st;

entry.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts,
5252
*/
5353
void unlink_entry(const struct cache_entry *ce);
5454

55-
void *read_blob_entry(const struct cache_entry *ce, unsigned long *size);
55+
void *read_blob_entry(const struct cache_entry *ce, size_t *size);
5656
int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st);
5757
void update_ce_after_write(const struct checkout *state, struct cache_entry *ce,
5858
struct stat *st);

git-compat-util.h

+25
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@
113113
#define unsigned_mult_overflows(a, b) \
114114
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))
115115

116+
/*
117+
* Returns true if the left shift of "a" by "shift" bits will
118+
* overflow. The type of "a" must be unsigned.
119+
*/
120+
#define unsigned_left_shift_overflows(a, shift) \
121+
((shift) < bitsizeof(a) && \
122+
(a) > maximum_unsigned_value_of_type(a) >> (shift))
123+
116124
#ifdef __GNUC__
117125
#define TYPEOF(x) (__typeof__(x))
118126
#else
@@ -883,6 +891,23 @@ static inline size_t st_sub(size_t a, size_t b)
883891
return a - b;
884892
}
885893

894+
static inline size_t st_left_shift(size_t a, unsigned shift)
895+
{
896+
if (unsigned_left_shift_overflows(a, shift))
897+
die("size_t overflow: %"PRIuMAX" << %u",
898+
(uintmax_t)a, shift);
899+
return a << shift;
900+
}
901+
902+
static inline unsigned long cast_size_t_to_ulong(size_t a)
903+
{
904+
if (a != (unsigned long)a)
905+
die("object too large to read on this platform: %"
906+
PRIuMAX" is cut off to %lu",
907+
(uintmax_t)a, (unsigned long)a);
908+
return (unsigned long)a;
909+
}
910+
886911
#ifdef HAVE_ALLOCA_H
887912
# include <alloca.h>
888913
# define xalloca(size) (alloca(size))

object-file.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -1349,7 +1349,7 @@ static void *unpack_loose_rest(git_zstream *stream,
13491349
int parse_loose_header(const char *hdr, struct object_info *oi)
13501350
{
13511351
const char *type_buf = hdr;
1352-
unsigned long size;
1352+
size_t size;
13531353
int type, type_len = 0;
13541354

13551355
/*
@@ -1384,12 +1384,12 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
13841384
if (c > 9)
13851385
break;
13861386
hdr++;
1387-
size = size * 10 + c;
1387+
size = st_add(st_mult(size, 10), c);
13881388
}
13891389
}
13901390

13911391
if (oi->sizep)
1392-
*oi->sizep = size;
1392+
*oi->sizep = cast_size_t_to_ulong(size);
13931393

13941394
/*
13951395
* The length must be followed by a zero byte

packfile.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
10601060
unsigned long len, enum object_type *type, unsigned long *sizep)
10611061
{
10621062
unsigned shift;
1063-
unsigned long size, c;
1063+
size_t size, c;
10641064
unsigned long used = 0;
10651065

10661066
c = buf[used++];
@@ -1074,10 +1074,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
10741074
break;
10751075
}
10761076
c = buf[used++];
1077-
size += (c & 0x7f) << shift;
1077+
size = st_add(size, st_left_shift(c & 0x7f, shift));
10781078
shift += 7;
10791079
}
1080-
*sizep = size;
1080+
*sizep = cast_size_t_to_ulong(size);
10811081
return used;
10821082
}
10831083

parallel-checkout.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
261261
struct stream_filter *filter;
262262
struct strbuf buf = STRBUF_INIT;
263263
char *blob;
264-
unsigned long size;
264+
size_t size;
265265
ssize_t wrote;
266266

267267
/* Sanity check */

t/helper/test-genzeros.c

+17-4
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,31 @@
33

44
int cmd__genzeros(int argc, const char **argv)
55
{
6-
long count;
6+
/* static, so that it is NUL-initialized */
7+
static const char zeros[256 * 1024];
8+
intmax_t count;
9+
ssize_t n;
710

811
if (argc > 2) {
912
fprintf(stderr, "usage: %s [<count>]\n", argv[0]);
1013
return 1;
1114
}
1215

13-
count = argc > 1 ? strtol(argv[1], NULL, 0) : -1L;
16+
count = argc > 1 ? strtoimax(argv[1], NULL, 0) : -1;
1417

15-
while (count < 0 || count--) {
16-
if (putchar(0) == EOF)
18+
/* Writing out individual NUL bytes is slow... */
19+
while (count < 0)
20+
if (write(1, zeros, ARRAY_SIZE(zeros)) < 0)
1721
return -1;
22+
23+
while (count > 0) {
24+
n = write(1, zeros, count < ARRAY_SIZE(zeros) ?
25+
count : ARRAY_SIZE(zeros));
26+
27+
if (n < 0)
28+
return -1;
29+
30+
count -= n;
1831
}
1932

2033
return 0;

t/t1051-large-conversion.sh

+25
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,29 @@ test_expect_success 'ident converts on output' '
8383
test_cmp small.clean large.clean
8484
'
8585

86+
# This smudge filter prepends 5GB of zeros to the file it checks out. This
87+
# ensures that smudging doesn't mangle large files on 64-bit Windows.
88+
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
89+
'files over 4GB convert on output' '
90+
test_commit test small "a small file" &&
91+
test_config filter.makelarge.smudge \
92+
"test-tool genzeros $((5*1024*1024*1024)) && cat" &&
93+
echo "small filter=makelarge" >.gitattributes &&
94+
rm small &&
95+
git checkout -- small &&
96+
size=$(test_file_size small) &&
97+
test "$size" -ge $((5 * 1024 * 1024 * 1024))
98+
'
99+
100+
# This clean filter writes down the size of input it receives. By checking against
101+
# the actual size, we ensure that cleaning doesn't mangle large files on 64-bit Windows.
102+
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
103+
'files over 4GB convert on input' '
104+
test-tool genzeros $((5*1024*1024*1024)) >big &&
105+
test_config filter.checklarge.clean "wc -c >big.size" &&
106+
echo "big filter=checklarge" >.gitattributes &&
107+
git add big &&
108+
test $(test_file_size big) -eq $(cat big.size)
109+
'
110+
86111
test_done

t/test-lib.sh

+4
Original file line numberDiff line numberDiff line change
@@ -1761,6 +1761,10 @@ build_option () {
17611761
sed -ne "s/^$1: //p"
17621762
}
17631763

1764+
test_lazy_prereq SIZE_T_IS_64BIT '
1765+
test 8 -eq "$(build_option sizeof-size_t)"
1766+
'
1767+
17641768
test_lazy_prereq LONG_IS_64BIT '
17651769
test 8 -le "$(build_option sizeof-long)"
17661770
'

0 commit comments

Comments
 (0)