summaryrefslogtreecommitdiff
path: root/rsync/scoop.c
Side-by-side diff
Diffstat (limited to 'rsync/scoop.c') (more/less context) (ignore whitespace changes)
-rw-r--r--rsync/scoop.c271
1 files changed, 271 insertions, 0 deletions
diff --git a/rsync/scoop.c b/rsync/scoop.c
new file mode 100644
index 0000000..9f68a60
--- a/dev/null
+++ b/rsync/scoop.c
@@ -0,0 +1,271 @@
+/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ *
+ * librsync -- the library for network deltas
+ * $Id$
+ *
+ * Copyright (C) 2000, 2001 by Martin Pool <mbp@samba.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * scoop.c -- This file deals with readahead from caller-supplied
+ * buffers.
+ *
+ * Many functions require a certain minimum amount of input to do their
+ * processing. For example, to calculate a strong checksum of a block
+ * we need at least a block of input.
+ *
+ * Since we put the buffers completely under the control of the caller,
+ * we can't count on ever getting this much data all in one go. We
+ * can't simply wait, because the caller might have a smaller buffer
+ * than we require and so we'll never get it. For the same reason we
+ * must always accept all the data we're given.
+ *
+ * So, stream input data that's required for readahead is put into a
+ * special buffer, from which the caller can then read. It's
+ * essentially like an internal pipe, which on any given read request
+ * may or may not be able to actually supply the data.
+ *
+ * As a future optimization, we might try to take data directly from the
+ * input buffer if there's already enough there.
+ */
+
+/*
+ * TODO: We probably know a maximum amount of data that can be scooped
+ * up, so we could just avoid dynamic allocation. However that can't
+ * be fixed at compile time, because when generating a delta it needs
+ * to be large enough to hold one full block. Perhaps we can set it
+ * up when the job is allocated? It would be kind of nice to not do
+ * any memory allocation after startup, as bzlib does this.
+ */
+
+
+ /*
+ | To walk on water you've gotta sink
+ | in the ice.
+ | -- Shihad, `The General Electric'.
+ */
+
+#include <config_rsync.h>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "rsync.h"
+#include "job.h"
+#include "stream.h"
+#include "trace.h"
+#include "util.h"
+
+
+#if 0
+# undef rs_trace
+# define rs_trace(s...)
+#endif
+
+
+/**
+ * Try to accept a from the input buffer to get LEN bytes in the scoop.
+ */
+void rs_scoop_input(rs_job_t *job, size_t len)
+{
+ rs_buffers_t *stream = job->stream;
+ size_t tocopy;
+
+ assert(len > job->scoop_avail);
+
+ if (job->scoop_alloc < len) {
+ /* need to allocate a new buffer, too */
+ char *newbuf;
+ int newsize = 2 * len;
+ newbuf = rs_alloc(newsize, "scoop buffer");
+ if (job->scoop_avail)
+ memcpy(newbuf, job->scoop_next, job->scoop_avail);
+ if (job->scoop_buf)
+ free(job->scoop_buf);
+ job->scoop_buf = job->scoop_next = newbuf;
+ rs_trace("resized scoop buffer to %.0f bytes from %.0f",
+ (double) newsize, (double) job->scoop_alloc);
+ job->scoop_alloc = newsize;
+ } else {
+ /* this buffer size is fine, but move the existing
+ * data down to the front. */
+ memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
+ job->scoop_next = job->scoop_buf;
+ }
+
+ /* take as much input as is available, to give up to LEN bytes
+ * in the scoop. */
+ tocopy = len - job->scoop_avail;
+ if (tocopy > stream->avail_in)
+ tocopy = stream->avail_in;
+ assert(tocopy + job->scoop_avail <= job->scoop_alloc);
+
+ memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
+ rs_trace("accepted %.0f bytes from input to scoop", (double) tocopy);
+ job->scoop_avail += tocopy;
+ stream->next_in += tocopy;
+ stream->avail_in -= tocopy;
+}
+
+
+/**
+ * Advance the input cursor forward \p len bytes. This is used after
+ * doing readahead, when you decide you want to keep it. \p len must
+ * be no more than the amount of available data, so you can't cheat.
+ *
+ * So when creating a delta, we require one block of readahead. But
+ * after examining that block, we might decide to advance over all of
+ * it (if there is a match), or just one byte (if not).
+ */
+void rs_scoop_advance(rs_job_t *job, size_t len)
+{
+ rs_buffers_t *stream = job->stream;
+
+ /* It never makes sense to advance over a mixture of bytes from
+ * the scoop and input, because you couldn't possibly have looked
+ * at them all at the same time. */
+ if (job->scoop_avail) {
+ /* reading from the scoop buffer */
+ rs_trace("advance over %d bytes from scoop", len);
+ assert(len <= job->scoop_avail);
+ job->scoop_avail -= len;
+ job->scoop_next += len;
+ } else {
+ rs_trace("advance over %d bytes from input buffer", len);
+ assert(len <= stream->avail_in);
+ stream->avail_in -= len;
+ stream->next_in += len;
+ }
+}
+
+
+
+/**
+ * \brief Read from scoop without advancing.
+ *
+ * Ask for LEN bytes of input from the stream. If that much data is
+ * available, then return a pointer to it in PTR, advance the stream
+ * input pointer over the data, and return RS_DONE. If there's not
+ * enough data, then accept whatever is there into a buffer, advance
+ * over it, and return RS_BLOCKED.
+ *
+ * The data is not actually removed from the input, so this function
+ * lets you do readahead. If you want to keep any of the data, you
+ * should also call rs_scoop_advance() to skip over it.
+ */
+rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
+{
+ rs_buffers_t *stream = job->stream;
+ rs_job_check(job);
+
+ if (job->scoop_avail >= len) {
+ /* We have enough data queued to satisfy the request,
+ * so go straight from the scoop buffer. */
+ rs_trace("got %.0f bytes direct from scoop", (double) len);
+ *ptr = job->scoop_next;
+ return RS_DONE;
+ } else if (job->scoop_avail) {
+ /* We have some data in the scoop, but not enough to
+ * satisfy the request. */
+ rs_trace("data is present in the scoop and must be used");
+ rs_scoop_input(job, len);
+
+ if (job->scoop_avail < len) {
+ rs_trace("still have only %.0f bytes in scoop",
+ (double) job->scoop_avail);
+ return RS_BLOCKED;
+ } else {
+ rs_trace("scoop now has %.0f bytes, this is enough",
+ (double) job->scoop_avail);
+ *ptr = job->scoop_next;
+ return RS_DONE;
+ }
+ } else if (stream->avail_in >= len) {
+ /* There's enough data in the stream's input */
+ *ptr = stream->next_in;
+ rs_trace("got %.0f bytes from input buffer", (double) len);
+ return RS_DONE;
+ } else if (stream->avail_in > 0) {
+ /* Nothing was queued before, but we don't have enough
+ * data to satisfy the request. So queue what little
+ * we have, and try again next time. */
+ rs_trace("couldn't satisfy request for %.0f, scooping %.0f bytes",
+ (double) len, (double) job->scoop_avail);
+ rs_scoop_input(job, len);
+ return RS_BLOCKED;
+ } else if (stream->eof_in) {
+ /* Nothing is queued before, and nothing is in the input
+ * buffer at the moment. */
+ rs_trace("reached end of input stream");
+ return RS_INPUT_ENDED;
+ } else {
+ /* Nothing queued at the moment. */
+ rs_trace("blocked with no data in scoop or input buffer");
+ return RS_BLOCKED;
+ }
+}
+
+
+
+/**
+ * Read LEN bytes if possible, and remove them from the input scoop.
+ * If there's not enough data yet, return RS_BLOCKED.
+ *
+ * \param ptr will be updated to point to a read-only buffer holding
+ * the data, if enough is available.
+ *
+ * \return RS_DONE if all the data was available, RS_BLOCKED if it's
+ * not there.
+ */
+rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
+{
+ rs_result result;
+
+ result = rs_scoop_readahead(job, len, ptr);
+ if (result == RS_DONE)
+ rs_scoop_advance(job, len);
+
+ return result;
+}
+
+
+
+/*
+ * Read whatever remains in the input stream, assuming that it runs up
+ * to the end of the file. Set LEN appropriately.
+ */
+rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
+{
+ rs_buffers_t *stream = job->stream;
+
+ *len = job->scoop_avail + stream->avail_in;
+
+ return rs_scoop_read(job, *len, ptr);
+}
+
+
+
+/**
+ * Return the total number of bytes available including the scoop and input
+ * buffer.
+ */
+size_t rs_scoop_total_avail(rs_job_t *job)
+{
+ return job->scoop_avail + job->stream->avail_in;
+}