-rw-r--r-- | rsync/scoop.c | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/rsync/scoop.c b/rsync/scoop.c new file mode 100644 index 0000000..9f68a60 --- a/dev/null +++ b/rsync/scoop.c | |||
@@ -0,0 +1,271 @@ | |||
1 | /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- | ||
2 | * | ||
3 | * librsync -- the library for network deltas | ||
4 | * $Id$ | ||
5 | * | ||
6 | * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public License | ||
10 | * as published by the Free Software Foundation; either version 2.1 of | ||
11 | * the License, or (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but | ||
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * scoop.c -- This file deals with readahead from caller-supplied | ||
25 | * buffers. | ||
26 | * | ||
27 | * Many functions require a certain minimum amount of input to do their | ||
28 | * processing. For example, to calculate a strong checksum of a block | ||
29 | * we need at least a block of input. | ||
30 | * | ||
31 | * Since we put the buffers completely under the control of the caller, | ||
32 | * we can't count on ever getting this much data all in one go. We | ||
33 | * can't simply wait, because the caller might have a smaller buffer | ||
34 | * than we require and so we'll never get it. For the same reason we | ||
35 | * must always accept all the data we're given. | ||
36 | * | ||
37 | * So, stream input data that's required for readahead is put into a | ||
38 | * special buffer, from which the caller can then read. It's | ||
39 | * essentially like an internal pipe, which on any given read request | ||
40 | * may or may not be able to actually supply the data. | ||
41 | * | ||
42 | * As a future optimization, we might try to take data directly from the | ||
43 | * input buffer if there's already enough there. | ||
44 | */ | ||
45 | |||
46 | /* | ||
47 | * TODO: We probably know a maximum amount of data that can be scooped | ||
48 | * up, so we could just avoid dynamic allocation. However that can't | ||
49 | * be fixed at compile time, because when generating a delta it needs | ||
50 | * to be large enough to hold one full block. Perhaps we can set it | ||
51 | * up when the job is allocated? It would be kind of nice to not do | ||
52 | * any memory allocation after startup, as bzlib does this. | ||
53 | */ | ||
54 | |||
55 | |||
56 | /* | ||
57 | | To walk on water you've gotta sink | ||
58 | | in the ice. | ||
59 | | -- Shihad, `The General Electric'. | ||
60 | */ | ||
61 | |||
62 | #include <config_rsync.h> | ||
63 | |||
64 | #include <assert.h> | ||
65 | #include <stdlib.h> | ||
66 | #include <stdio.h> | ||
67 | #include <string.h> | ||
68 | |||
69 | #include "rsync.h" | ||
70 | #include "job.h" | ||
71 | #include "stream.h" | ||
72 | #include "trace.h" | ||
73 | #include "util.h" | ||
74 | |||
75 | |||
76 | #if 0 | ||
77 | # undef rs_trace | ||
78 | # define rs_trace(s...) | ||
79 | #endif | ||
80 | |||
81 | |||
82 | /** | ||
83 | * Try to accept a from the input buffer to get LEN bytes in the scoop. | ||
84 | */ | ||
85 | void rs_scoop_input(rs_job_t *job, size_t len) | ||
86 | { | ||
87 | rs_buffers_t *stream = job->stream; | ||
88 | size_t tocopy; | ||
89 | |||
90 | assert(len > job->scoop_avail); | ||
91 | |||
92 | if (job->scoop_alloc < len) { | ||
93 | /* need to allocate a new buffer, too */ | ||
94 | char *newbuf; | ||
95 | int newsize = 2 * len; | ||
96 | newbuf = rs_alloc(newsize, "scoop buffer"); | ||
97 | if (job->scoop_avail) | ||
98 | memcpy(newbuf, job->scoop_next, job->scoop_avail); | ||
99 | if (job->scoop_buf) | ||
100 | free(job->scoop_buf); | ||
101 | job->scoop_buf = job->scoop_next = newbuf; | ||
102 | rs_trace("resized scoop buffer to %.0f bytes from %.0f", | ||
103 | (double) newsize, (double) job->scoop_alloc); | ||
104 | job->scoop_alloc = newsize; | ||
105 | } else { | ||
106 | /* this buffer size is fine, but move the existing | ||
107 | * data down to the front. */ | ||
108 | memmove(job->scoop_buf, job->scoop_next, job->scoop_avail); | ||
109 | job->scoop_next = job->scoop_buf; | ||
110 | } | ||
111 | |||
112 | /* take as much input as is available, to give up to LEN bytes | ||
113 | * in the scoop. */ | ||
114 | tocopy = len - job->scoop_avail; | ||
115 | if (tocopy > stream->avail_in) | ||
116 | tocopy = stream->avail_in; | ||
117 | assert(tocopy + job->scoop_avail <= job->scoop_alloc); | ||
118 | |||
119 | memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy); | ||
120 | rs_trace("accepted %.0f bytes from input to scoop", (double) tocopy); | ||
121 | job->scoop_avail += tocopy; | ||
122 | stream->next_in += tocopy; | ||
123 | stream->avail_in -= tocopy; | ||
124 | } | ||
125 | |||
126 | |||
127 | /** | ||
128 | * Advance the input cursor forward \p len bytes. This is used after | ||
129 | * doing readahead, when you decide you want to keep it. \p len must | ||
130 | * be no more than the amount of available data, so you can't cheat. | ||
131 | * | ||
132 | * So when creating a delta, we require one block of readahead. But | ||
133 | * after examining that block, we might decide to advance over all of | ||
134 | * it (if there is a match), or just one byte (if not). | ||
135 | */ | ||
136 | void rs_scoop_advance(rs_job_t *job, size_t len) | ||
137 | { | ||
138 | rs_buffers_t *stream = job->stream; | ||
139 | |||
140 | /* It never makes sense to advance over a mixture of bytes from | ||
141 | * the scoop and input, because you couldn't possibly have looked | ||
142 | * at them all at the same time. */ | ||
143 | if (job->scoop_avail) { | ||
144 | /* reading from the scoop buffer */ | ||
145 | rs_trace("advance over %d bytes from scoop", len); | ||
146 | assert(len <= job->scoop_avail); | ||
147 | job->scoop_avail -= len; | ||
148 | job->scoop_next += len; | ||
149 | } else { | ||
150 | rs_trace("advance over %d bytes from input buffer", len); | ||
151 | assert(len <= stream->avail_in); | ||
152 | stream->avail_in -= len; | ||
153 | stream->next_in += len; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | |||
158 | |||
159 | /** | ||
160 | * \brief Read from scoop without advancing. | ||
161 | * | ||
162 | * Ask for LEN bytes of input from the stream. If that much data is | ||
163 | * available, then return a pointer to it in PTR, advance the stream | ||
164 | * input pointer over the data, and return RS_DONE. If there's not | ||
165 | * enough data, then accept whatever is there into a buffer, advance | ||
166 | * over it, and return RS_BLOCKED. | ||
167 | * | ||
168 | * The data is not actually removed from the input, so this function | ||
169 | * lets you do readahead. If you want to keep any of the data, you | ||
170 | * should also call rs_scoop_advance() to skip over it. | ||
171 | */ | ||
172 | rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr) | ||
173 | { | ||
174 | rs_buffers_t *stream = job->stream; | ||
175 | rs_job_check(job); | ||
176 | |||
177 | if (job->scoop_avail >= len) { | ||
178 | /* We have enough data queued to satisfy the request, | ||
179 | * so go straight from the scoop buffer. */ | ||
180 | rs_trace("got %.0f bytes direct from scoop", (double) len); | ||
181 | *ptr = job->scoop_next; | ||
182 | return RS_DONE; | ||
183 | } else if (job->scoop_avail) { | ||
184 | /* We have some data in the scoop, but not enough to | ||
185 | * satisfy the request. */ | ||
186 | rs_trace("data is present in the scoop and must be used"); | ||
187 | rs_scoop_input(job, len); | ||
188 | |||
189 | if (job->scoop_avail < len) { | ||
190 | rs_trace("still have only %.0f bytes in scoop", | ||
191 | (double) job->scoop_avail); | ||
192 | return RS_BLOCKED; | ||
193 | } else { | ||
194 | rs_trace("scoop now has %.0f bytes, this is enough", | ||
195 | (double) job->scoop_avail); | ||
196 | *ptr = job->scoop_next; | ||
197 | return RS_DONE; | ||
198 | } | ||
199 | } else if (stream->avail_in >= len) { | ||
200 | /* There's enough data in the stream's input */ | ||
201 | *ptr = stream->next_in; | ||
202 | rs_trace("got %.0f bytes from input buffer", (double) len); | ||
203 | return RS_DONE; | ||
204 | } else if (stream->avail_in > 0) { | ||
205 | /* Nothing was queued before, but we don't have enough | ||
206 | * data to satisfy the request. So queue what little | ||
207 | * we have, and try again next time. */ | ||
208 | rs_trace("couldn't satisfy request for %.0f, scooping %.0f bytes", | ||
209 | (double) len, (double) job->scoop_avail); | ||
210 | rs_scoop_input(job, len); | ||
211 | return RS_BLOCKED; | ||
212 | } else if (stream->eof_in) { | ||
213 | /* Nothing is queued before, and nothing is in the input | ||
214 | * buffer at the moment. */ | ||
215 | rs_trace("reached end of input stream"); | ||
216 | return RS_INPUT_ENDED; | ||
217 | } else { | ||
218 | /* Nothing queued at the moment. */ | ||
219 | rs_trace("blocked with no data in scoop or input buffer"); | ||
220 | return RS_BLOCKED; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | |||
225 | |||
226 | /** | ||
227 | * Read LEN bytes if possible, and remove them from the input scoop. | ||
228 | * If there's not enough data yet, return RS_BLOCKED. | ||
229 | * | ||
230 | * \param ptr will be updated to point to a read-only buffer holding | ||
231 | * the data, if enough is available. | ||
232 | * | ||
233 | * \return RS_DONE if all the data was available, RS_BLOCKED if it's | ||
234 | * not there. | ||
235 | */ | ||
236 | rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr) | ||
237 | { | ||
238 | rs_result result; | ||
239 | |||
240 | result = rs_scoop_readahead(job, len, ptr); | ||
241 | if (result == RS_DONE) | ||
242 | rs_scoop_advance(job, len); | ||
243 | |||
244 | return result; | ||
245 | } | ||
246 | |||
247 | |||
248 | |||
249 | /* | ||
250 | * Read whatever remains in the input stream, assuming that it runs up | ||
251 | * to the end of the file. Set LEN appropriately. | ||
252 | */ | ||
253 | rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr) | ||
254 | { | ||
255 | rs_buffers_t *stream = job->stream; | ||
256 | |||
257 | *len = job->scoop_avail + stream->avail_in; | ||
258 | |||
259 | return rs_scoop_read(job, *len, ptr); | ||
260 | } | ||
261 | |||
262 | |||
263 | |||
264 | /** | ||
265 | * Return the total number of bytes available including the scoop and input | ||
266 | * buffer. | ||
267 | */ | ||
268 | size_t rs_scoop_total_avail(rs_job_t *job) | ||
269 | { | ||
270 | return job->scoop_avail + job->stream->avail_in; | ||
271 | } | ||