-rw-r--r-- | rsync/scoop.c | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/rsync/scoop.c b/rsync/scoop.c new file mode 100644 index 0000000..9f68a60 --- a/dev/null +++ b/rsync/scoop.c @@ -0,0 +1,271 @@ +/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- + * + * librsync -- the library for network deltas + * $Id$ + * + * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * scoop.c -- This file deals with readahead from caller-supplied + * buffers. + * + * Many functions require a certain minimum amount of input to do their + * processing. For example, to calculate a strong checksum of a block + * we need at least a block of input. + * + * Since we put the buffers completely under the control of the caller, + * we can't count on ever getting this much data all in one go. We + * can't simply wait, because the caller might have a smaller buffer + * than we require and so we'll never get it. For the same reason we + * must always accept all the data we're given. + * + * So, stream input data that's required for readahead is put into a + * special buffer, from which the caller can then read. It's + * essentially like an internal pipe, which on any given read request + * may or may not be able to actually supply the data. + * + * As a future optimization, we might try to take data directly from the + * input buffer if there's already enough there. + */ + +/* + * TODO: We probably know a maximum amount of data that can be scooped + * up, so we could just avoid dynamic allocation. However that can't + * be fixed at compile time, because when generating a delta it needs + * to be large enough to hold one full block. Perhaps we can set it + * up when the job is allocated? It would be kind of nice to not do + * any memory allocation after startup, as bzlib does this. + */ + + + /* + | To walk on water you've gotta sink + | in the ice. + | -- Shihad, `The General Electric'. + */ + +#include <config_rsync.h> + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "rsync.h" +#include "job.h" +#include "stream.h" +#include "trace.h" +#include "util.h" + + +#if 0 +# undef rs_trace +# define rs_trace(s...) +#endif + + +/** + * Try to accept a from the input buffer to get LEN bytes in the scoop. + */ +void rs_scoop_input(rs_job_t *job, size_t len) +{ + rs_buffers_t *stream = job->stream; + size_t tocopy; + + assert(len > job->scoop_avail); + + if (job->scoop_alloc < len) { + /* need to allocate a new buffer, too */ + char *newbuf; + int newsize = 2 * len; + newbuf = rs_alloc(newsize, "scoop buffer"); + if (job->scoop_avail) + memcpy(newbuf, job->scoop_next, job->scoop_avail); + if (job->scoop_buf) + free(job->scoop_buf); + job->scoop_buf = job->scoop_next = newbuf; + rs_trace("resized scoop buffer to %.0f bytes from %.0f", + (double) newsize, (double) job->scoop_alloc); + job->scoop_alloc = newsize; + } else { + /* this buffer size is fine, but move the existing + * data down to the front. */ + memmove(job->scoop_buf, job->scoop_next, job->scoop_avail); + job->scoop_next = job->scoop_buf; + } + + /* take as much input as is available, to give up to LEN bytes + * in the scoop. */ + tocopy = len - job->scoop_avail; + if (tocopy > stream->avail_in) + tocopy = stream->avail_in; + assert(tocopy + job->scoop_avail <= job->scoop_alloc); + + memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy); + rs_trace("accepted %.0f bytes from input to scoop", (double) tocopy); + job->scoop_avail += tocopy; + stream->next_in += tocopy; + stream->avail_in -= tocopy; +} + + +/** + * Advance the input cursor forward \p len bytes. This is used after + * doing readahead, when you decide you want to keep it. \p len must + * be no more than the amount of available data, so you can't cheat. + * + * So when creating a delta, we require one block of readahead. But + * after examining that block, we might decide to advance over all of + * it (if there is a match), or just one byte (if not). + */ +void rs_scoop_advance(rs_job_t *job, size_t len) +{ + rs_buffers_t *stream = job->stream; + + /* It never makes sense to advance over a mixture of bytes from + * the scoop and input, because you couldn't possibly have looked + * at them all at the same time. */ + if (job->scoop_avail) { + /* reading from the scoop buffer */ + rs_trace("advance over %d bytes from scoop", len); + assert(len <= job->scoop_avail); + job->scoop_avail -= len; + job->scoop_next += len; + } else { + rs_trace("advance over %d bytes from input buffer", len); + assert(len <= stream->avail_in); + stream->avail_in -= len; + stream->next_in += len; + } +} + + + +/** + * \brief Read from scoop without advancing. + * + * Ask for LEN bytes of input from the stream. If that much data is + * available, then return a pointer to it in PTR, advance the stream + * input pointer over the data, and return RS_DONE. If there's not + * enough data, then accept whatever is there into a buffer, advance + * over it, and return RS_BLOCKED. + * + * The data is not actually removed from the input, so this function + * lets you do readahead. If you want to keep any of the data, you + * should also call rs_scoop_advance() to skip over it. + */ +rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr) +{ + rs_buffers_t *stream = job->stream; + rs_job_check(job); + + if (job->scoop_avail >= len) { + /* We have enough data queued to satisfy the request, + * so go straight from the scoop buffer. */ + rs_trace("got %.0f bytes direct from scoop", (double) len); + *ptr = job->scoop_next; + return RS_DONE; + } else if (job->scoop_avail) { + /* We have some data in the scoop, but not enough to + * satisfy the request. */ + rs_trace("data is present in the scoop and must be used"); + rs_scoop_input(job, len); + + if (job->scoop_avail < len) { + rs_trace("still have only %.0f bytes in scoop", + (double) job->scoop_avail); + return RS_BLOCKED; + } else { + rs_trace("scoop now has %.0f bytes, this is enough", + (double) job->scoop_avail); + *ptr = job->scoop_next; + return RS_DONE; + } + } else if (stream->avail_in >= len) { + /* There's enough data in the stream's input */ + *ptr = stream->next_in; + rs_trace("got %.0f bytes from input buffer", (double) len); + return RS_DONE; + } else if (stream->avail_in > 0) { + /* Nothing was queued before, but we don't have enough + * data to satisfy the request. So queue what little + * we have, and try again next time. */ + rs_trace("couldn't satisfy request for %.0f, scooping %.0f bytes", + (double) len, (double) job->scoop_avail); + rs_scoop_input(job, len); + return RS_BLOCKED; + } else if (stream->eof_in) { + /* Nothing is queued before, and nothing is in the input + * buffer at the moment. */ + rs_trace("reached end of input stream"); + return RS_INPUT_ENDED; + } else { + /* Nothing queued at the moment. */ + rs_trace("blocked with no data in scoop or input buffer"); + return RS_BLOCKED; + } +} + + + +/** + * Read LEN bytes if possible, and remove them from the input scoop. + * If there's not enough data yet, return RS_BLOCKED. + * + * \param ptr will be updated to point to a read-only buffer holding + * the data, if enough is available. + * + * \return RS_DONE if all the data was available, RS_BLOCKED if it's + * not there. + */ +rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr) +{ + rs_result result; + + result = rs_scoop_readahead(job, len, ptr); + if (result == RS_DONE) + rs_scoop_advance(job, len); + + return result; +} + + + +/* + * Read whatever remains in the input stream, assuming that it runs up + * to the end of the file. Set LEN appropriately. + */ +rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr) +{ + rs_buffers_t *stream = job->stream; + + *len = job->scoop_avail + stream->avail_in; + + return rs_scoop_read(job, *len, ptr); +} + + + +/** + * Return the total number of bytes available including the scoop and input + * buffer. + */ +size_t rs_scoop_total_avail(rs_job_t *job) +{ + return job->scoop_avail + job->stream->avail_in; +} |