-rw-r--r-- | rsync/search.c | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/rsync/search.c b/rsync/search.c new file mode 100644 index 0000000..3e0c5e2 --- a/dev/null +++ b/rsync/search.c | |||
@@ -0,0 +1,162 @@ | |||
1 | /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- | ||
2 | * | ||
3 | * librsync -- the library for network deltas | ||
4 | * $Id$ | ||
5 | * | ||
6 | * Copyright (C) 1999, 2000, 2001 by Martin Pool <mbp@samba.org> | ||
7 | * Copyright (C) 1999 by Andrew Tridgell | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU Lesser General Public License as published by | ||
11 | * the Free Software Foundation; either version 2.1 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * This file contains code for searching the sumset for matching | ||
26 | * values. | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * TODO: The common case is that the next block in both streams | ||
31 | * match. Can we make that a bit faster at all? We'd need to perhaps | ||
32 | * add a link forward between blocks in the sum_struct corresponding | ||
33 | * to the order they're found in the input; then before doing a search | ||
34 | * we can just check that pointer. | ||
35 | */ | ||
36 | |||
37 | #include <config_rsync.h> | ||
38 | |||
39 | #include <string.h> | ||
40 | #include <assert.h> | ||
41 | #include <stdlib.h> | ||
42 | #include <stdio.h> | ||
43 | |||
44 | #include "rsync.h" | ||
45 | #include "trace.h" | ||
46 | #include "util.h" | ||
47 | #include "sumset.h" | ||
48 | #include "search.h" | ||
49 | #include "checksum.h" | ||
50 | |||
51 | |||
52 | #define TABLESIZE (1<<16) | ||
53 | #define NULL_TAG (-1) | ||
54 | |||
55 | |||
56 | #define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF) | ||
57 | #define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16) | ||
58 | |||
59 | |||
60 | static int | ||
61 | rs_compare_targets(rs_target_t const *t1, rs_target_t const *t2) | ||
62 | { | ||
63 | return ((int) t1->t - (int) t2->t); | ||
64 | } | ||
65 | |||
66 | |||
67 | rs_result | ||
68 | rs_build_hash_table(rs_signature_t * sums) | ||
69 | { | ||
70 | int i; | ||
71 | |||
72 | sums->tag_table = calloc(TABLESIZE, sizeof sums->tag_table[0]); | ||
73 | if (!sums->tag_table) | ||
74 | return RS_MEM_ERROR; | ||
75 | |||
76 | if (sums->count > 0) { | ||
77 | sums->targets = calloc(sums->count, sizeof(rs_target_t)); | ||
78 | if (!sums->targets) | ||
79 | return RS_MEM_ERROR; | ||
80 | |||
81 | for (i = 0; i < sums->count; i++) { | ||
82 | sums->targets[i].i = i; | ||
83 | sums->targets[i].t = gettag(sums->block_sigs[i].weak_sum); | ||
84 | } | ||
85 | |||
86 | /* FIXME: Perhaps if this operating system has comparison_fn_t | ||
87 | * like GNU, then use it in the cast. But really does anyone | ||
88 | * care? */ | ||
89 | qsort(sums->targets, sums->count, | ||
90 | sizeof(sums->targets[0]), | ||
91 | (int (*)(const void *, const void *)) rs_compare_targets); | ||
92 | } | ||
93 | |||
94 | for (i = 0; i < TABLESIZE; i++) | ||
95 | sums->tag_table[i] = NULL_TAG; | ||
96 | |||
97 | for (i = sums->count - 1; i >= 0; i--) { | ||
98 | sums->tag_table[sums->targets[i].t] = i; | ||
99 | } | ||
100 | |||
101 | rs_trace("done"); | ||
102 | return RS_DONE; | ||
103 | } | ||
104 | |||
105 | |||
106 | |||
107 | /* | ||
108 | * See if there is a match for the specified block INBUF..BLOCK_LEN in | ||
109 | * the checksum set, using precalculated WEAK_SUM. | ||
110 | * | ||
111 | * If we don't find a match on the weak checksum, then we just give | ||
112 | * up. If we do find a weak match, then we proceed to calculate the | ||
113 | * strong checksum for the current block, and see if it will match | ||
114 | * anything. | ||
115 | */ | ||
116 | int | ||
117 | rs_search_for_block(rs_weak_sum_t weak_sum, | ||
118 | char const *inbuf, size_t block_len, | ||
119 | rs_signature_t const *sig, rs_stats_t * stats, | ||
120 | rs_long_t * match_where) | ||
121 | { | ||
122 | int hash_tag = gettag(weak_sum); | ||
123 | int j = sig->tag_table[hash_tag]; | ||
124 | rs_strong_sum_t strong_sum; | ||
125 | int got_strong = 0; | ||
126 | |||
127 | if (j == NULL_TAG) { | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | for (; j < sig->count && sig->targets[j].t == hash_tag; j++) { | ||
132 | int i = sig->targets[j].i; | ||
133 | int token; | ||
134 | |||
135 | if (weak_sum != sig->block_sigs[i].weak_sum) | ||
136 | continue; | ||
137 | |||
138 | token = sig->block_sigs[i].i; | ||
139 | |||
140 | rs_trace("found weak match for %08x in token %d", weak_sum, token); | ||
141 | |||
142 | if (!got_strong) { | ||
143 | rs_calc_strong_sum(inbuf, block_len, &strong_sum); | ||
144 | got_strong = 1; | ||
145 | } | ||
146 | |||
147 | /* FIXME: Use correct dynamic sum length! */ | ||
148 | if (memcmp(strong_sum, sig->block_sigs[i].strong_sum, | ||
149 | sig->strong_sum_len) == 0) { | ||
150 | /* XXX: This is a remnant of rsync: token number 1 is the | ||
151 | * block at offset 0. It would be good to clear this | ||
152 | * up. */ | ||
153 | *match_where = (token - 1) * sig->block_len; | ||
154 | return 1; | ||
155 | } else { | ||
156 | rs_trace("this was a false positive, the strong sig doesn't match"); | ||
157 | stats->false_matches++; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | return 0; | ||
162 | } | ||