summaryrefslogtreecommitdiff
path: root/rsync/delta.c
Unidiff
Diffstat (limited to 'rsync/delta.c') (more/less context) (ignore whitespace changes)
-rw-r--r--rsync/delta.c2
1 files changed, 2 insertions, 0 deletions
diff --git a/rsync/delta.c b/rsync/delta.c
index 323c079..42f3afb 100644
--- a/rsync/delta.c
+++ b/rsync/delta.c
@@ -1,351 +1,353 @@
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 * 2 *
3 * librsync -- library for network deltas 3 * librsync -- library for network deltas
4 * $Id$ 4 * $Id$
5 * 5 *
6 * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org> 6 * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org>
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by 9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation; either version 2.1 of the License, or 10 * the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version. 11 * (at your option) any later version.
12 * 12 *
13 * This program is distributed in the hope that it will be useful, 13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU Lesser General Public License for more details. 16 * GNU Lesser General Public License for more details.
17 * 17 *
18 * You should have received a copy of the GNU Lesser General Public License 18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this program; if not, write to the Free Software 19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */ 21 */
22 22
23 /* 23 /*
24 | Let's climb to the TOP of that 24 | Let's climb to the TOP of that
25 | MOUNTAIN and think about STRIP 25 | MOUNTAIN and think about STRIP
26 | MINING!! 26 | MINING!!
27 */ 27 */
28 28
29 29
30/* 30/*
31 * delta.c -- Generate in streaming mode an rsync delta given a set of 31 * delta.c -- Generate in streaming mode an rsync delta given a set of
32 * signatures, and a new file. 32 * signatures, and a new file.
33 * 33 *
34 * The size of blocks for signature generation is determined by the 34 * The size of blocks for signature generation is determined by the
35 * block size in the incoming signature. 35 * block size in the incoming signature.
36 * 36 *
37 * To calculate a signature, we need to be able to see at least one 37 * To calculate a signature, we need to be able to see at least one
38 * block of the new file at a time. Once we have that, we calculate 38 * block of the new file at a time. Once we have that, we calculate
39 * its weak signature, and see if there is any block in the signature 39 * its weak signature, and see if there is any block in the signature
40 * hash table that has the same weak sum. If there is one, then we 40 * hash table that has the same weak sum. If there is one, then we
41 * also compute the strong sum of the new block, and cross check that. 41 * also compute the strong sum of the new block, and cross check that.
42 * If they're the same, then we can assume we have a match. 42 * If they're the same, then we can assume we have a match.
43 * 43 *
44 * The final block of the file has to be handled a little differently, 44 * The final block of the file has to be handled a little differently,
45 * because it may be a short match. Short blocks in the signature 45 * because it may be a short match. Short blocks in the signature
46 * don't include their length -- we just allow for the final short 46 * don't include their length -- we just allow for the final short
47 * block of the file to match any block in the signature, and if they 47 * block of the file to match any block in the signature, and if they
48 * have the same checksum we assume they must have the same length. 48 * have the same checksum we assume they must have the same length.
49 * Therefore, when we emit a COPY command, we have to send it with a 49 * Therefore, when we emit a COPY command, we have to send it with a
50 * length that is the same as the block matched, and not the block 50 * length that is the same as the block matched, and not the block
51 * length from the signature. 51 * length from the signature.
52 */ 52 */
53 53
54/* 54/*
55 * Profiling results as of v1.26, 2001-03-18: 55 * Profiling results as of v1.26, 2001-03-18:
56 * 56 *
57 * If everything matches, then we spend almost all our time in 57 * If everything matches, then we spend almost all our time in
58 * rs_mdfour64 and rs_weak_sum, which is unavoidable and therefore a 58 * rs_mdfour64 and rs_weak_sum, which is unavoidable and therefore a
59 * good profile. 59 * good profile.
60 * 60 *
61 * If nothing matches, it is not so good. 61 * If nothing matches, it is not so good.
62 */ 62 */
63 63
64 64
65#include <config_rsync.h> 65#include <config_rsync.h>
66 66
67#include <assert.h> 67#include <assert.h>
68#include <stdlib.h> 68#include <stdlib.h>
69#include <stdio.h> 69#include <stdio.h>
70 70
71#include "rsync.h" 71#include "rsync.h"
72#include "emit.h" 72#include "emit.h"
73#include "stream.h" 73#include "stream.h"
74#include "util.h" 74#include "util.h"
75#include "sumset.h" 75#include "sumset.h"
76#include "job.h" 76#include "job.h"
77#include "trace.h" 77#include "trace.h"
78#include "checksum.h" 78#include "checksum.h"
79#include "search.h" 79#include "search.h"
80#include "types.h" 80#include "types.h"
81 81
82 82
83/** 83/**
84 * Turn this on to make all rolling checksums be checked from scratch. 84 * Turn this on to make all rolling checksums be checked from scratch.
85 */ 85 */
86int rs_roll_paranoia = 0; 86int rs_roll_paranoia = 0;
87 87
88 88
89static rs_result rs_delta_scan(rs_job_t *, rs_long_t avail_len, void *); 89static rs_result rs_delta_scan(rs_job_t *, rs_long_t avail_len, void *);
90 90
91static rs_result rs_delta_s_deferred_copy(rs_job_t *job); 91static rs_result rs_delta_s_deferred_copy(rs_job_t *job);
92 92
93 93
94 94
95static rs_result rs_delta_s_end(rs_job_t *job) 95static rs_result rs_delta_s_end(rs_job_t *job)
96{ 96{
97 rs_emit_end_cmd(job); 97 rs_emit_end_cmd(job);
98 return RS_DONE; 98 return RS_DONE;
99} 99}
100 100
101 101
102/** 102/**
103 * \brief Get a block of data if possible, and see if it matches. 103 * \brief Get a block of data if possible, and see if it matches.
104 * 104 *
105 * On each call, we try to process all of the input data available on 105 * On each call, we try to process all of the input data available on
106 * the scoop and input buffer. 106 * the scoop and input buffer.
107 */ 107 */
108static rs_result 108static rs_result
109rs_delta_s_scan(rs_job_t *job) 109rs_delta_s_scan(rs_job_t *job)
110{ 110{
111 size_t this_len, avail_len; 111 size_t this_len, avail_len;
112 int is_ending; 112 int is_ending;
113 void *inptr; 113 void *inptr;
114 rs_result result; 114 rs_result result;
115 115
116 rs_job_check(job); 116 rs_job_check(job);
117 117
118 avail_len = rs_scoop_total_avail(job); 118 avail_len = rs_scoop_total_avail(job);
119 this_len = job->block_len; 119 this_len = job->block_len;
120 is_ending = job->stream->eof_in; 120 is_ending = job->stream->eof_in;
121 121
122 /* Now, we have avail_len bytes, and we need to scan through them 122 /* Now, we have avail_len bytes, and we need to scan through them
123 * looking for a match. We'll always end up emitting exactly one 123 * looking for a match. We'll always end up emitting exactly one
124 * command, either a literal or a copy, and after discovering that 124 * command, either a literal or a copy, and after discovering that
125 * we will skip over the appropriate number of bytes. */ 125 * we will skip over the appropriate number of bytes. */
126 if (avail_len == 0) { 126 if (avail_len == 0) {
127 if (is_ending) { 127 if (is_ending) {
128 /* no more delta to do */ 128 /* no more delta to do */
129 job->statefn = rs_delta_s_end; 129 job->statefn = rs_delta_s_end;
130 } 130 }
131 return RS_BLOCKED; 131 return RS_BLOCKED;
132 } 132 }
133 133
134 /* must read at least one block, or give up */ 134 /* must read at least one block, or give up */
135 if ((avail_len < job->block_len) && !is_ending) { 135 if ((avail_len < job->block_len) && !is_ending) {
136 /* we know we won't get it, but we have to try for a whole 136 /* we know we won't get it, but we have to try for a whole
137 * block anyhow so that it gets into the scoop. */ 137 * block anyhow so that it gets into the scoop. */
138 rs_scoop_input(job, job->block_len); 138 rs_scoop_input(job, job->block_len);
139 return RS_BLOCKED; 139 return RS_BLOCKED;
140 } 140 }
141 141
142 result = rs_scoop_readahead(job, avail_len, &inptr); 142 result = rs_scoop_readahead(job, avail_len, &inptr);
143 if (result != RS_DONE) 143 if (result != RS_DONE)
144 return result; 144 return result;
145 145
146 return rs_delta_scan(job, avail_len, inptr); 146 return rs_delta_scan(job, avail_len, inptr);
147} 147}
148 148
149 149
150 150
151/** 151/**
152 * Scan for a matching block in the next \p avail_len bytes of input. 152 * Scan for a matching block in the next \p avail_len bytes of input.
153 * 153 *
154 * If nonmatching data is found, then a LITERAL command will be put in 154 * If nonmatching data is found, then a LITERAL command will be put in
155 * the tube immediately. If matching data is found, then its position 155 * the tube immediately. If matching data is found, then its position
156 * will be saved in the job, and the job state set up to write out a 156 * will be saved in the job, and the job state set up to write out a
157 * COPY command after handling the literal. 157 * COPY command after handling the literal.
158 */ 158 */
159static rs_result 159static rs_result
160rs_delta_scan(rs_job_t *job, rs_long_t avail_len, void *p) 160rs_delta_scan(rs_job_t *job, rs_long_t avail_len, void *p)
161{ 161{
162 rs_long_t match_where; 162 rs_long_t match_where;
163 int search_pos, end_pos; 163 int search_pos, end_pos;
164 unsigned char *inptr = (unsigned char *) p; 164 unsigned char *inptr = (unsigned char *) p;
165 uint32_t s1 = job->weak_sig & 0xFFFF; 165 uint32_t s1 = job->weak_sig & 0xFFFF;
166 uint32_t s2 = job->weak_sig >> 16; 166 uint32_t s2 = job->weak_sig >> 16;
167 167
168 /* So, we have avail_len bytes of data, and we want to look 168 /* So, we have avail_len bytes of data, and we want to look
169 * through it for a match at some point. It's OK if it's not at 169 * through it for a match at some point. It's OK if it's not at
170 * the start of the available input data. If we're approaching 170 * the start of the available input data. If we're approaching
171 * the end and can't get a match, then we just block and get more 171 * the end and can't get a match, then we just block and get more
172 * later. */ 172 * later. */
173 173
174 /* FIXME: Perhaps we should be working in signed chars for the 174 /* FIXME: Perhaps we should be working in signed chars for the
175 * rolling sum? */ 175 * rolling sum? */
176 176
177 if (job->stream->eof_in) 177 if (job->stream->eof_in)
178 end_pos = avail_len - 1; 178 end_pos = avail_len - 1;
179 else 179 else
180 end_pos = avail_len - job->block_len; 180 end_pos = avail_len - job->block_len;
181 181
182 for (search_pos = 0; search_pos <= end_pos; search_pos++) { 182 for (search_pos = 0; search_pos <= end_pos; search_pos++) {
183 size_t this_len = job->block_len; 183 size_t this_len = job->block_len;
184 184
185 if (search_pos + this_len > avail_len) { 185 if (search_pos + this_len > avail_len) {
186 this_len = avail_len - search_pos; 186 this_len = avail_len - search_pos;
187 rs_trace("block reduced to %d", this_len); 187 rs_trace("block reduced to %d", this_len);
188 } else if (job->have_weak_sig) { 188 } else if (job->have_weak_sig) {
189 unsigned char a = inptr[search_pos + this_len - 1]; 189 unsigned char a = inptr[search_pos + this_len - 1];
190 /* roll in the newly added byte, if any */ 190 /* roll in the newly added byte, if any */
191 s1 += a + RS_CHAR_OFFSET; 191 s1 += a + RS_CHAR_OFFSET;
192 s2 += s1; 192 s2 += s1;
193 193
194 job->weak_sig = (s1 & 0xffff) | (s2 << 16); 194 job->weak_sig = (s1 & 0xffff) | (s2 << 16);
195 } 195 }
196 196
197 if (!job->have_weak_sig) { 197 if (!job->have_weak_sig) {
198 rs_trace("calculate weak sum from scratch"); 198 rs_trace("calculate weak sum from scratch");
199 job->weak_sig = rs_calc_weak_sum(inptr + search_pos, this_len); 199 job->weak_sig = rs_calc_weak_sum(inptr + search_pos, this_len);
200 s1 = job->weak_sig & 0xFFFF; 200 s1 = job->weak_sig & 0xFFFF;
201 s2 = job->weak_sig >> 16; 201 s2 = job->weak_sig >> 16;
202 job->have_weak_sig = 1; 202 job->have_weak_sig = 1;
203 } 203 }
204 204
205 if (rs_roll_paranoia) { 205 if (rs_roll_paranoia) {
206 rs_weak_sum_t verify = rs_calc_weak_sum(inptr + search_pos, this_len); 206 rs_weak_sum_t verify = rs_calc_weak_sum(inptr + search_pos, this_len);
207 if (verify != job->weak_sig) { 207 if (verify != job->weak_sig) {
208 rs_fatal("mismatch between rolled sum %#x and check %#x", 208 rs_fatal("mismatch between rolled sum %#x and check %#x",
209 job->weak_sig, verify); 209 job->weak_sig, verify);
210 } 210 }
211 } 211 }
212 212
213 if (rs_search_for_block(job->weak_sig, inptr + search_pos, this_len, 213 if (rs_search_for_block(job->weak_sig, inptr + search_pos, this_len,
214 job->signature, &job->stats, &match_where)) { 214 job->signature, &job->stats, &match_where)) {
215 /* So, we got a match. Cool. However, there may be 215 /* So, we got a match. Cool. However, there may be
216 * leading unmatched data that we need to flush. Thus we 216 * leading unmatched data that we need to flush. Thus we
217 * set our statefn to be rs_delta_s_deferred_copy so that 217 * set our statefn to be rs_delta_s_deferred_copy so that
218 * we can write out the command later. */ 218 * we can write out the command later. */
219 219
220 rs_trace("matched %.0f bytes at %.0f!", 220 rs_trace("matched %.0f bytes at %.0f!",
221 (double) this_len, (double) match_where); 221 (double) this_len, (double) match_where);
222 job->basis_pos = match_where; 222 job->basis_pos = match_where;
223 job->basis_len = this_len; 223 job->basis_len = this_len;
224 job->statefn = rs_delta_s_deferred_copy; 224 job->statefn = rs_delta_s_deferred_copy;
225 job->have_weak_sig = 0; 225 job->have_weak_sig = 0;
226 break; 226 break;
227 } else { 227 } else {
228 /* advance by one; roll out the byte we just moved over. */ 228 /* advance by one; roll out the byte we just moved over. */
229 unsigned char a = inptr[search_pos]; 229 unsigned char a = inptr[search_pos];
230 unsigned char shift = a + RS_CHAR_OFFSET; 230 unsigned char shift = a + RS_CHAR_OFFSET;
231 231
232 s1 -= shift; 232 s1 -= shift;
233 s2 -= this_len * shift; 233 s2 -= this_len * shift;
234 job->weak_sig = (s1 & 0xffff) | (s2 << 16); 234 job->weak_sig = (s1 & 0xffff) | (s2 << 16);
235 } 235 }
236 } 236 }
237 237
238 if (search_pos > 0) { 238 if (search_pos > 0) {
239 /* We may or may not have found a block, but we know we found 239 /* We may or may not have found a block, but we know we found
240 * some literal data at the start of the buffer. Therefore, 240 * some literal data at the start of the buffer. Therefore,
241 * we have to flush that out before we can continue on and 241 * we have to flush that out before we can continue on and
242 * emit the copy command or keep searching. */ 242 * emit the copy command or keep searching. */
243 243
244 /* FIXME: At the moment, if you call with very short buffers, 244 /* FIXME: At the moment, if you call with very short buffers,
245 * then you will get a series of very short LITERAL commands. 245 * then you will get a series of very short LITERAL commands.
246 * Perhaps this is what you deserve, or perhaps we should try 246 * Perhaps this is what you deserve, or perhaps we should try
247 * to get more readahead and avoid that. */ 247 * to get more readahead and avoid that. */
248 248
249 /* There's some literal data at the start of this window which 249 /* There's some literal data at the start of this window which
250 * we know is not in any block. */ 250 * we know is not in any block. */
251 rs_trace("got %d bytes of literal data", search_pos); 251 rs_trace("got %d bytes of literal data", search_pos);
252 rs_emit_literal_cmd(job, search_pos); 252 rs_emit_literal_cmd(job, search_pos);
253 rs_tube_copy(job, search_pos); 253 rs_tube_copy(job, search_pos);
254 } 254 }
255 255
256 return RS_RUNNING; 256 return RS_RUNNING;
257} 257}
258 258
259 259
260 260
261static rs_result rs_delta_s_deferred_copy(rs_job_t *job) 261static rs_result rs_delta_s_deferred_copy(rs_job_t *job)
262{ 262{
263 if (!job->basis_len) { 263 if (!job->basis_len) {
264 rs_log(RS_LOG_ERR, "somehow got zero basis_len"); 264 rs_log(RS_LOG_ERR, "somehow got zero basis_len");
265 return RS_INTERNAL_ERROR; 265 return RS_INTERNAL_ERROR;
266 } 266 }
267 267
268 rs_emit_copy_cmd(job, job->basis_pos, job->basis_len); 268 rs_emit_copy_cmd(job, job->basis_pos, job->basis_len);
269 rs_scoop_advance(job, job->basis_len); 269 rs_scoop_advance(job, job->basis_len);
270 270
271 job->statefn = rs_delta_s_scan; 271 job->statefn = rs_delta_s_scan;
272 272
273 return RS_RUNNING; 273 return RS_RUNNING;
274} 274}
275 275
276 276
277/** 277/**
278 * \brief State function that does a slack delta containing only 278 * \brief State function that does a slack delta containing only
279 * literal data to recreate the input. 279 * literal data to recreate the input.
280 */ 280 */
281static rs_result rs_delta_s_slack(rs_job_t *job) 281static rs_result rs_delta_s_slack(rs_job_t *job)
282{ 282{
283 rs_buffers_t * const stream = job->stream; 283 rs_buffers_t * const stream = job->stream;
284 size_t avail = stream->avail_in; 284 size_t avail = stream->avail_in;
285 285
286 if (avail) { 286 if (avail) {
287 rs_trace("emit slack delta for %.0f available bytes", (double) avail); 287 rs_trace("emit slack delta for %.0f available bytes", (double) avail);
288 rs_emit_literal_cmd(job, avail); 288 rs_emit_literal_cmd(job, avail);
289 rs_tube_copy(job, avail); 289 rs_tube_copy(job, avail);
290 return RS_RUNNING; 290 return RS_RUNNING;
291 } else { 291 } else {
292 if (rs_job_input_is_ending(job)) { 292 if (rs_job_input_is_ending(job)) {
293 job->statefn = rs_delta_s_end; 293 job->statefn = rs_delta_s_end;
294 return RS_RUNNING; 294 return RS_RUNNING;
295 } else { 295 } else {
296 return RS_BLOCKED; 296 return RS_BLOCKED;
297 } 297 }
298 } 298 }
299} 299}
300 300
301 301
302/** 302/**
303 * State function for writing out the header of the encoding job. 303 * State function for writing out the header of the encoding job.
304 */ 304 */
305static rs_result rs_delta_s_header(rs_job_t *job) 305static rs_result rs_delta_s_header(rs_job_t *job)
306{ 306{
307 rs_emit_delta_header(job); 307 rs_emit_delta_header(job);
308 308
309 if (job->block_len) { 309 if (job->block_len) {
310 if (!job->signature) { 310 if (!job->signature) {
311 rs_error("no signature is loaded into the job"); 311 rs_error("no signature is loaded into the job");
312 return RS_PARAM_ERROR; 312 return RS_PARAM_ERROR;
313 } 313 }
314 job->statefn = rs_delta_s_scan; 314 job->statefn = rs_delta_s_scan;
315 } else { 315 } else {
316 rs_trace("block length is zero for this delta; " 316 rs_trace("block length is zero for this delta; "
317 "therefore using slack deltas"); 317 "therefore using slack deltas");
318 job->statefn = rs_delta_s_slack; 318 job->statefn = rs_delta_s_slack;
319 } 319 }
320 320
321 return RS_RUNNING; 321 return RS_RUNNING;
322} 322}
323 323
324 324
325/** 325/**
326 * Prepare to compute a streaming delta. 326 * Prepare to compute a streaming delta.
327 */ 327 */
328rs_job_t *rs_delta_begin(rs_signature_t *sig) 328rs_job_t *rs_delta_begin(rs_signature_t *sig)
329{ 329{
330 rs_job_t *job; 330 rs_job_t *job;
331 331
332 job = rs_job_new("delta", rs_delta_s_header); 332 job = rs_job_new("delta", rs_delta_s_header);
333 job->signature = sig; 333 job->signature = sig;
334 334
335 if ((job->block_len = sig->block_len) < 0) { 335 if ((job->block_len = sig->block_len) < 0) {
336 rs_log(RS_LOG_ERR, "unreasonable block_len %d in signature", 336 rs_log(RS_LOG_ERR, "unreasonable block_len %d in signature",
337 job->block_len); 337 job->block_len);
338 rs_job_free(job);
338 return NULL; 339 return NULL;
339 } 340 }
340 341
341 job->strong_sum_len = sig->strong_sum_len; 342 job->strong_sum_len = sig->strong_sum_len;
342 if (job->strong_sum_len < 0 || job->strong_sum_len > RS_MD4_LENGTH) { 343 if (job->strong_sum_len < 0 || job->strong_sum_len > RS_MD4_LENGTH) {
343 rs_log(RS_LOG_ERR, "unreasonable strong_sum_len %d in signature", 344 rs_log(RS_LOG_ERR, "unreasonable strong_sum_len %d in signature",
344 job->strong_sum_len); 345 job->strong_sum_len);
346 rs_job_free(job);
345 return NULL; 347 return NULL;
346 } 348 }
347 349
348 return job; 350 return job;
349} 351}
350 352
351 353