test_dataDistribution.py 13.4 KB
Newer Older
1
import random
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import unittest

from xtreemfs_client import dataDistribution
from xtreemfs_client import osd
from xtreemfs_client import folder

osd_id_prefix = 'osd_'
folder_id_prefix = 'folder_'
osd_capacity_key = 'capacity'


class TestDataDistribution(unittest.TestCase):
    def setUp(self):
        osd_capacity = 100
        capacity_key = 'capacity'
        capacities = {}
        for i in range(0, 2):
            new_osd = osd.OSD('osd' + str(i))
            capacities[new_osd.uuid] = {}
            capacities[new_osd.uuid][capacity_key] = osd_capacity

23
24
        random.seed(9234)

25
26
27
28
29
30
    def test_totally_random_distribution(self):
        # generate some random distributions and check whether they are different
        max_osd_total_folder_size = 0
        felix_and_farouk_different = False

        num_osds = 3
31
        osds_capacities = [100]
32
        num_folders = 10
33
        folder_sizes = [20]
34
35
36

        for i in range(0, 100):
            distribution_felix = dataDistribution.DataDistribution()
37
38
            distribution_felix.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_felix.add_folders(create_test_folder_list(num_folders, folder_sizes),
39
40
41
42
                                           random_osd_assignment=True,
                                           ignore_osd_capacities=True)

            distribution_farouk = dataDistribution.DataDistribution()
43
44
            distribution_farouk.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_farouk.add_folders(create_test_folder_list(num_folders, folder_sizes),
45
46
47
48
                                            random_osd_assignment=True,
                                            ignore_osd_capacities=True)

            osds_felix = distribution_felix.get_osd_list()
49
50
            osds_felix_total_folder_sizes = list(
                map(lambda x: distribution_felix.OSDs[x].total_folder_size, osds_felix))
51
52

            osds_farouk = distribution_farouk.get_osd_list()
53
54
            osds_farouk_total_folder_sizes = list(
                map(lambda x: distribution_farouk.OSDs[x].total_folder_size, osds_farouk))
55
56
57
58
59
60
61
62
63
64

            if osds_felix_total_folder_sizes[0] != osds_farouk_total_folder_sizes[0]:
                felix_and_farouk_different = True

            max_felix = max(osds_felix_total_folder_sizes)
            max_farouk = max(osds_farouk_total_folder_sizes)

            max_osd_total_folder_size = max(max_osd_total_folder_size, max_felix, max_farouk)

        self.assertTrue(felix_and_farouk_different)
65
        self.assertTrue(max_osd_total_folder_size > osds_capacities[0])
66
67

    def test_random_distribution_respecting_capacities(self):
68
69
        # generate some random distributions and check whether they all respect the OSD capacities
        num_osds = 3
70
        osds_capacities = [100]
71
        num_folders = 10
72
73
74
        folder_size = [20]

        max_osd_total_folder_size = 0
75
76
77

        for i in range(0, 100):
            distribution = dataDistribution.DataDistribution()
78
            distribution.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
79
80
            distribution.set_osd_capacities(create_osd_information(num_osds, osds_capacities))

81
82
            distribution.add_folders(create_test_folder_list(num_folders, folder_size),
                                     random_osd_assignment=True,
83
                                     ignore_osd_capacities=False)
84
85
86
87
88

            osds = distribution.get_osd_list()
            total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
            max_osd_total_folder_size = max(max(total_folder_sizes), max_osd_total_folder_size)

89
90
91
92
93
94
95
96
97
98
99
        self.assertTrue(max_osd_total_folder_size <= osds_capacities[0])

    def test_random_round_robin_distribution(self):
        # generate some random distributions
        # and check whether OSDs are almost-equally loaded and whether they are different
        num_osds = 3
        osd_capacities = [0]
        num_folders = 10
        folder_sizes = [1]

        a_b_different = False
100

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
        for i in range(0, 100):
            distribution_a = dataDistribution.DataDistribution()
            distribution_a.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_a.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)
            distribution_b = dataDistribution.DataDistribution()
            distribution_b.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_b.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)

            osds_a = distribution_a.get_osd_list()
            total_folder_sizes_a = list(map(lambda x: distribution_a.OSDs[x].total_folder_size, osds_a))
            self.assertTrue(max(total_folder_sizes_a) is not min(total_folder_sizes_a))

            osds_b = distribution_b.get_osd_list()
            total_folder_sizes_b = list(map(lambda x: distribution_b.OSDs[x].total_folder_size, osds_b))
            self.assertTrue(max(total_folder_sizes_b) is not min(total_folder_sizes_b))

            if list(list(distribution_a.OSDs.values())[0].folders.keys())[0] \
                    != list(list(distribution_b.OSDs.values())[0].folders.keys())[0]:
                a_b_different = True

        self.assertTrue(a_b_different)
126
127

    def test_lpt_distribution(self):
128
129
130
131
132
        folder_sizes = [3, 7, 11]
        num_folders = 4
        num_osds = 4
        osd_capacities = [0]

133
        # test for equally-sized OSDs
134
135
136
137
138
139
140
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(min(total_folder_sizes) == max(total_folder_sizes))

141
        # test 1 for differently-sized OSDs
142
143
144
        osd_bandwidths_1 = [10, 20]
        folder_sizes = [4, 4, 4, 4, 4, 4]

145
        distribution = dataDistribution.DataDistribution()
146
147
148
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths_1))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths_1))

149
150
151
152
153
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(2 * min(total_folder_sizes) == max(total_folder_sizes))

154
155
        # test 2 for differently-sized OSDs. the expected result is that the 4 large OSD receive 2 files each,
        # while the 4 small OSDs receive no files.
156
        osd_bandwidths_2 = [10, 30]
157
158
        folder_sizes = [1]
        num_folders = 8
159

160
        distribution = dataDistribution.DataDistribution()
161
162
163
164
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths_2))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths_2))

        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
165
166
167
168
169
170
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        self.assertEqual(0, min(total_folder_sizes))
        self.assertEqual(2, max(total_folder_sizes))

171
172
    def test_average_osd_processing_time(self):
        folder_sizes = [48, 123, 1, 7]
173
174
        num_folders = 2
        num_osds = 4
175
        osd_bandwidths = [10, 15]
176
177

        distribution = dataDistribution.DataDistribution()
178
179
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths))
180

181
182
183
184
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))

        average = 3.05
        self.assertEqual(average, distribution.get_average_processing_time())
185

186
187
188
189
190
191
192
193
194
195
196
197
    def test_average_total_folder_size(self):
        folder_sizes = [49, 123, 1, 7]
        num_folders = 2
        num_osds = 4
        osd_capacities = [100, 150]
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 create_osd_information(num_osds, osd_capacities),
                                 osd_capacity_key)

        average = (sum(folder_sizes) * num_folders) / (num_osds * len(osd_capacities))
198
        self.assertEqual(average, distribution.get_average_load())
199

200
201
202
203
204
205
206
207
    def test_rebalance_lpt(self):
        folder_sizes = [1]
        num_folders = 8
        osd_capacities = [10]
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
208
209
        distribution.set_osd_capacities(create_osd_information(num_osds, osd_capacities))

210
211
212
213
214
215
216
217
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
        distribution.rebalance_lpt()

        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))

218
219
220
    def test_rebalance_one_folder(self):
        folder_sizes = [1]
        num_folders = 8
221
        osd_bandwidths = [10]
222
223
224
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
225
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
226
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
227

228
        distribution.rebalance_one_folder()
229

230
231
232
233
234
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))

235
        osd_bandwidths = [10, 30]
236
237
238
        folder_sizes = [1]
        num_folders = 8
        distribution = dataDistribution.DataDistribution()
239
240
241
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths))

242
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
243
244

        distribution.rebalance_one_folder()
245
246
247
248
249
250
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        # all folders should now be on the 'large' OSDs
        self.assertEqual(0, min(total_folder_sizes))
        self.assertEqual(2, max(total_folder_sizes))
251

252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
    def test_rebalance_two_steps(self):
        folder_sizes = [1]
        num_folders = 12
        osd_bandwidths = [10]
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)

        distribution.rebalance_two_steps()

        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        # 12 unit size folders on 4 OSD => each should have 3 files
        self.assertEqual(3, min(total_folder_sizes))
        self.assertEqual(3, max(total_folder_sizes))
        self.assertEqual(12, sum(total_folder_sizes))

        # same test with one more folder size
        folder_sizes = [1, 2]

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)

        distribution.rebalance_two_steps()

        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        self.assertEqual(9, min(total_folder_sizes))
        self.assertEqual(9, max(total_folder_sizes))
        self.assertEqual(36, sum(total_folder_sizes))


289
def create_test_osd_list(num_osds, osd_capacities):
290
291
    test_osds = []
    for i in range(0, num_osds):
292
293
        for osd_capacity in osd_capacities:
            test_osds.append(create_osd_id(osd_capacity, i))
294
295
296
    return test_osds


297
def create_test_folder_list(num_folders, folder_sizes):
298
299
    test_folders = []
    for i in range(0, num_folders):
300
301
302
        for folder_size in folder_sizes:
            new_folder = folder.Folder(folder_id_prefix + "_" + str(folder_size) + "_" + str(i), folder_size, None)
            test_folders.append(new_folder)
303
    random.shuffle(test_folders)
304
305
306
    return test_folders


307
def create_osd_information(num_osds, osd_capacities):
308
309
    osd_information = {}
    for i in range(0, num_osds):
310
311
        for osd_capacity in osd_capacities:
            osd_uuid = create_osd_id(osd_capacity, i)
312
            osd_information[osd_uuid] = osd_capacity
313
    return osd_information
314
315
316
317


def create_osd_id(capacity, index):
    return osd_id_prefix + "_" + str(capacity) + "_" + str(index)