summaryrefslogtreecommitdiff
path: root/rsync/search.c
Side-by-side diff
Diffstat (limited to 'rsync/search.c') (more/less context) (show whitespace changes)
-rw-r--r--rsync/search.c162
1 files changed, 162 insertions, 0 deletions
diff --git a/rsync/search.c b/rsync/search.c
new file mode 100644
index 0000000..3e0c5e2
--- a/dev/null
+++ b/rsync/search.c
@@ -0,0 +1,162 @@
+/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ *
+ * librsync -- the library for network deltas
+ * $Id$
+ *
+ * Copyright (C) 1999, 2000, 2001 by Martin Pool <mbp@samba.org>
+ * Copyright (C) 1999 by Andrew Tridgell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains code for searching the sumset for matching
+ * values.
+ */
+
+/*
+ * TODO: The common case is that the next block in both streams
+ * match. Can we make that a bit faster at all? We'd need to perhaps
+ * add a link forward between blocks in the sum_struct corresponding
+ * to the order they're found in the input; then before doing a search
+ * we can just check that pointer.
+ */
+
+#include <config_rsync.h>
+
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "rsync.h"
+#include "trace.h"
+#include "util.h"
+#include "sumset.h"
+#include "search.h"
+#include "checksum.h"
+
+
+#define TABLESIZE (1<<16)
+#define NULL_TAG (-1)
+
+
+#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
+#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
+
+
+static int
+rs_compare_targets(rs_target_t const *t1, rs_target_t const *t2)
+{
+ return ((int) t1->t - (int) t2->t);
+}
+
+
+rs_result
+rs_build_hash_table(rs_signature_t * sums)
+{
+ int i;
+
+ sums->tag_table = calloc(TABLESIZE, sizeof sums->tag_table[0]);
+ if (!sums->tag_table)
+ return RS_MEM_ERROR;
+
+ if (sums->count > 0) {
+ sums->targets = calloc(sums->count, sizeof(rs_target_t));
+ if (!sums->targets)
+ return RS_MEM_ERROR;
+
+ for (i = 0; i < sums->count; i++) {
+ sums->targets[i].i = i;
+ sums->targets[i].t = gettag(sums->block_sigs[i].weak_sum);
+ }
+
+ /* FIXME: Perhaps if this operating system has comparison_fn_t
+ * like GNU, then use it in the cast. But really does anyone
+ * care? */
+ qsort(sums->targets, sums->count,
+ sizeof(sums->targets[0]),
+ (int (*)(const void *, const void *)) rs_compare_targets);
+ }
+
+ for (i = 0; i < TABLESIZE; i++)
+ sums->tag_table[i] = NULL_TAG;
+
+ for (i = sums->count - 1; i >= 0; i--) {
+ sums->tag_table[sums->targets[i].t] = i;
+ }
+
+ rs_trace("done");
+ return RS_DONE;
+}
+
+
+
+/*
+ * See if there is a match for the specified block INBUF..BLOCK_LEN in
+ * the checksum set, using precalculated WEAK_SUM.
+ *
+ * If we don't find a match on the weak checksum, then we just give
+ * up. If we do find a weak match, then we proceed to calculate the
+ * strong checksum for the current block, and see if it will match
+ * anything.
+ */
+int
+rs_search_for_block(rs_weak_sum_t weak_sum,
+ char const *inbuf, size_t block_len,
+ rs_signature_t const *sig, rs_stats_t * stats,
+ rs_long_t * match_where)
+{
+ int hash_tag = gettag(weak_sum);
+ int j = sig->tag_table[hash_tag];
+ rs_strong_sum_t strong_sum;
+ int got_strong = 0;
+
+ if (j == NULL_TAG) {
+ return 0;
+ }
+
+ for (; j < sig->count && sig->targets[j].t == hash_tag; j++) {
+ int i = sig->targets[j].i;
+ int token;
+
+ if (weak_sum != sig->block_sigs[i].weak_sum)
+ continue;
+
+ token = sig->block_sigs[i].i;
+
+ rs_trace("found weak match for %08x in token %d", weak_sum, token);
+
+ if (!got_strong) {
+ rs_calc_strong_sum(inbuf, block_len, &strong_sum);
+ got_strong = 1;
+ }
+
+ /* FIXME: Use correct dynamic sum length! */
+ if (memcmp(strong_sum, sig->block_sigs[i].strong_sum,
+ sig->strong_sum_len) == 0) {
+ /* XXX: This is a remnant of rsync: token number 1 is the
+ * block at offset 0. It would be good to clear this
+ * up. */
+ *match_where = (token - 1) * sig->block_len;
+ return 1;
+ } else {
+ rs_trace("this was a false positive, the strong sig doesn't match");
+ stats->false_matches++;
+ }
+ }
+
+ return 0;
+}