summaryrefslogtreecommitdiff
path: root/rsync/scoop.c
Unidiff
Diffstat (limited to 'rsync/scoop.c') (more/less context) (ignore whitespace changes)
-rw-r--r--rsync/scoop.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/rsync/scoop.c b/rsync/scoop.c
new file mode 100644
index 0000000..9f68a60
--- a/dev/null
+++ b/rsync/scoop.c
@@ -0,0 +1,271 @@
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 *
3 * librsync -- the library for network deltas
4 * $Id$
5 *
6 * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public License
10 * as published by the Free Software Foundation; either version 2.1 of
11 * the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
23/*
24 * scoop.c -- This file deals with readahead from caller-supplied
25 * buffers.
26 *
27 * Many functions require a certain minimum amount of input to do their
28 * processing. For example, to calculate a strong checksum of a block
29 * we need at least a block of input.
30 *
31 * Since we put the buffers completely under the control of the caller,
32 * we can't count on ever getting this much data all in one go. We
33 * can't simply wait, because the caller might have a smaller buffer
34 * than we require and so we'll never get it. For the same reason we
35 * must always accept all the data we're given.
36 *
37 * So, stream input data that's required for readahead is put into a
38 * special buffer, from which the caller can then read. It's
39 * essentially like an internal pipe, which on any given read request
40 * may or may not be able to actually supply the data.
41 *
42 * As a future optimization, we might try to take data directly from the
43 * input buffer if there's already enough there.
44 */
45
46/*
47 * TODO: We probably know a maximum amount of data that can be scooped
48 * up, so we could just avoid dynamic allocation. However that can't
49 * be fixed at compile time, because when generating a delta it needs
50 * to be large enough to hold one full block. Perhaps we can set it
51 * up when the job is allocated? It would be kind of nice to not do
52 * any memory allocation after startup, as bzlib does this.
53 */
54
55
56 /*
57 | To walk on water you've gotta sink
58 | in the ice.
59 | -- Shihad, `The General Electric'.
60 */
61
62#include <config_rsync.h>
63
64#include <assert.h>
65#include <stdlib.h>
66#include <stdio.h>
67#include <string.h>
68
69#include "rsync.h"
70#include "job.h"
71#include "stream.h"
72#include "trace.h"
73#include "util.h"
74
75
76#if 0
77# undef rs_trace
78# define rs_trace(s...)
79#endif
80
81
82/**
83 * Try to accept a from the input buffer to get LEN bytes in the scoop.
84 */
85void rs_scoop_input(rs_job_t *job, size_t len)
86{
87 rs_buffers_t *stream = job->stream;
88 size_t tocopy;
89
90 assert(len > job->scoop_avail);
91
92 if (job->scoop_alloc < len) {
93 /* need to allocate a new buffer, too */
94 char *newbuf;
95 int newsize = 2 * len;
96 newbuf = rs_alloc(newsize, "scoop buffer");
97 if (job->scoop_avail)
98 memcpy(newbuf, job->scoop_next, job->scoop_avail);
99 if (job->scoop_buf)
100 free(job->scoop_buf);
101 job->scoop_buf = job->scoop_next = newbuf;
102 rs_trace("resized scoop buffer to %.0f bytes from %.0f",
103 (double) newsize, (double) job->scoop_alloc);
104 job->scoop_alloc = newsize;
105 } else {
106 /* this buffer size is fine, but move the existing
107 * data down to the front. */
108 memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
109 job->scoop_next = job->scoop_buf;
110 }
111
112 /* take as much input as is available, to give up to LEN bytes
113 * in the scoop. */
114 tocopy = len - job->scoop_avail;
115 if (tocopy > stream->avail_in)
116 tocopy = stream->avail_in;
117 assert(tocopy + job->scoop_avail <= job->scoop_alloc);
118
119 memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
120 rs_trace("accepted %.0f bytes from input to scoop", (double) tocopy);
121 job->scoop_avail += tocopy;
122 stream->next_in += tocopy;
123 stream->avail_in -= tocopy;
124}
125
126
127/**
128 * Advance the input cursor forward \p len bytes. This is used after
129 * doing readahead, when you decide you want to keep it. \p len must
130 * be no more than the amount of available data, so you can't cheat.
131 *
132 * So when creating a delta, we require one block of readahead. But
133 * after examining that block, we might decide to advance over all of
134 * it (if there is a match), or just one byte (if not).
135 */
136void rs_scoop_advance(rs_job_t *job, size_t len)
137{
138 rs_buffers_t *stream = job->stream;
139
140 /* It never makes sense to advance over a mixture of bytes from
141 * the scoop and input, because you couldn't possibly have looked
142 * at them all at the same time. */
143 if (job->scoop_avail) {
144 /* reading from the scoop buffer */
145 rs_trace("advance over %d bytes from scoop", len);
146 assert(len <= job->scoop_avail);
147 job->scoop_avail -= len;
148 job->scoop_next += len;
149 } else {
150 rs_trace("advance over %d bytes from input buffer", len);
151 assert(len <= stream->avail_in);
152 stream->avail_in -= len;
153 stream->next_in += len;
154 }
155}
156
157
158
159/**
160 * \brief Read from scoop without advancing.
161 *
162 * Ask for LEN bytes of input from the stream. If that much data is
163 * available, then return a pointer to it in PTR, advance the stream
164 * input pointer over the data, and return RS_DONE. If there's not
165 * enough data, then accept whatever is there into a buffer, advance
166 * over it, and return RS_BLOCKED.
167 *
168 * The data is not actually removed from the input, so this function
169 * lets you do readahead. If you want to keep any of the data, you
170 * should also call rs_scoop_advance() to skip over it.
171 */
172rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
173{
174 rs_buffers_t *stream = job->stream;
175 rs_job_check(job);
176
177 if (job->scoop_avail >= len) {
178 /* We have enough data queued to satisfy the request,
179 * so go straight from the scoop buffer. */
180 rs_trace("got %.0f bytes direct from scoop", (double) len);
181 *ptr = job->scoop_next;
182 return RS_DONE;
183 } else if (job->scoop_avail) {
184 /* We have some data in the scoop, but not enough to
185 * satisfy the request. */
186 rs_trace("data is present in the scoop and must be used");
187 rs_scoop_input(job, len);
188
189 if (job->scoop_avail < len) {
190 rs_trace("still have only %.0f bytes in scoop",
191 (double) job->scoop_avail);
192 return RS_BLOCKED;
193 } else {
194 rs_trace("scoop now has %.0f bytes, this is enough",
195 (double) job->scoop_avail);
196 *ptr = job->scoop_next;
197 return RS_DONE;
198 }
199 } else if (stream->avail_in >= len) {
200 /* There's enough data in the stream's input */
201 *ptr = stream->next_in;
202 rs_trace("got %.0f bytes from input buffer", (double) len);
203 return RS_DONE;
204 } else if (stream->avail_in > 0) {
205 /* Nothing was queued before, but we don't have enough
206 * data to satisfy the request. So queue what little
207 * we have, and try again next time. */
208 rs_trace("couldn't satisfy request for %.0f, scooping %.0f bytes",
209 (double) len, (double) job->scoop_avail);
210 rs_scoop_input(job, len);
211 return RS_BLOCKED;
212 } else if (stream->eof_in) {
213 /* Nothing is queued before, and nothing is in the input
214 * buffer at the moment. */
215 rs_trace("reached end of input stream");
216 return RS_INPUT_ENDED;
217 } else {
218 /* Nothing queued at the moment. */
219 rs_trace("blocked with no data in scoop or input buffer");
220 return RS_BLOCKED;
221 }
222}
223
224
225
226/**
227 * Read LEN bytes if possible, and remove them from the input scoop.
228 * If there's not enough data yet, return RS_BLOCKED.
229 *
230 * \param ptr will be updated to point to a read-only buffer holding
231 * the data, if enough is available.
232 *
233 * \return RS_DONE if all the data was available, RS_BLOCKED if it's
234 * not there.
235 */
236rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
237{
238 rs_result result;
239
240 result = rs_scoop_readahead(job, len, ptr);
241 if (result == RS_DONE)
242 rs_scoop_advance(job, len);
243
244 return result;
245}
246
247
248
249/*
250 * Read whatever remains in the input stream, assuming that it runs up
251 * to the end of the file. Set LEN appropriately.
252 */
253rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
254{
255 rs_buffers_t *stream = job->stream;
256
257 *len = job->scoop_avail + stream->avail_in;
258
259 return rs_scoop_read(job, *len, ptr);
260}
261
262
263
264/**
265 * Return the total number of bytes available including the scoop and input
266 * buffer.
267 */
268size_t rs_scoop_total_avail(rs_job_t *job)
269{
270 return job->scoop_avail + job->stream->avail_in;
271}