test_dataDistribution.py 11.9 KB
Newer Older
1
import random
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import unittest

from xtreemfs_client import dataDistribution
from xtreemfs_client import osd
from xtreemfs_client import folder

osd_id_prefix = 'osd_'
folder_id_prefix = 'folder_'
osd_capacity_key = 'capacity'


class TestDataDistribution(unittest.TestCase):
    def setUp(self):
        osd_capacity = 100
        capacity_key = 'capacity'
        capacities = {}
        for i in range(0, 2):
            new_osd = osd.OSD('osd' + str(i))
            capacities[new_osd.uuid] = {}
            capacities[new_osd.uuid][capacity_key] = osd_capacity

    def test_totally_random_distribution(self):
        # generate some random distributions and check whether they are different
        max_osd_total_folder_size = 0
        felix_and_farouk_different = False

        num_osds = 3
29
        osds_capacities = [100]
30
        num_folders = 10
31
        folder_sizes = [20]
32
33
34

        for i in range(0, 100):
            distribution_felix = dataDistribution.DataDistribution()
35
36
            distribution_felix.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_felix.add_folders(create_test_folder_list(num_folders, folder_sizes),
37
38
39
40
                                           random_osd_assignment=True,
                                           ignore_osd_capacities=True)

            distribution_farouk = dataDistribution.DataDistribution()
41
42
            distribution_farouk.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_farouk.add_folders(create_test_folder_list(num_folders, folder_sizes),
43
44
45
46
                                            random_osd_assignment=True,
                                            ignore_osd_capacities=True)

            osds_felix = distribution_felix.get_osd_list()
47
48
            osds_felix_total_folder_sizes = list(
                map(lambda x: distribution_felix.OSDs[x].total_folder_size, osds_felix))
49
50

            osds_farouk = distribution_farouk.get_osd_list()
51
52
            osds_farouk_total_folder_sizes = list(
                map(lambda x: distribution_farouk.OSDs[x].total_folder_size, osds_farouk))
53
54
55
56
57
58
59
60
61
62

            if osds_felix_total_folder_sizes[0] != osds_farouk_total_folder_sizes[0]:
                felix_and_farouk_different = True

            max_felix = max(osds_felix_total_folder_sizes)
            max_farouk = max(osds_farouk_total_folder_sizes)

            max_osd_total_folder_size = max(max_osd_total_folder_size, max_felix, max_farouk)

        self.assertTrue(felix_and_farouk_different)
63
        self.assertTrue(max_osd_total_folder_size > osds_capacities[0])
64
65

    def test_random_distribution_respecting_capacities(self):
66
67
        # generate some random distributions and check whether they all respect the OSD capacities
        num_osds = 3
68
        osds_capacities = [100]
69
        num_folders = 10
70
71
72
        folder_size = [20]

        max_osd_total_folder_size = 0
73
74
75

        for i in range(0, 100):
            distribution = dataDistribution.DataDistribution()
76
            distribution.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
77
78
            distribution.set_osd_capacities(create_osd_information(num_osds, osds_capacities))

79
80
            distribution.add_folders(create_test_folder_list(num_folders, folder_size),
                                     random_osd_assignment=True,
81
                                     ignore_osd_capacities=False)
82
83
84
85
86

            osds = distribution.get_osd_list()
            total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
            max_osd_total_folder_size = max(max(total_folder_sizes), max_osd_total_folder_size)

87
88
89
90
91
92
93
94
95
96
97
        self.assertTrue(max_osd_total_folder_size <= osds_capacities[0])

    def test_random_round_robin_distribution(self):
        # generate some random distributions
        # and check whether OSDs are almost-equally loaded and whether they are different
        num_osds = 3
        osd_capacities = [0]
        num_folders = 10
        folder_sizes = [1]

        a_b_different = False
98

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
        for i in range(0, 100):
            distribution_a = dataDistribution.DataDistribution()
            distribution_a.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_a.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)
            distribution_b = dataDistribution.DataDistribution()
            distribution_b.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_b.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)

            osds_a = distribution_a.get_osd_list()
            total_folder_sizes_a = list(map(lambda x: distribution_a.OSDs[x].total_folder_size, osds_a))
            self.assertTrue(max(total_folder_sizes_a) is not min(total_folder_sizes_a))

            osds_b = distribution_b.get_osd_list()
            total_folder_sizes_b = list(map(lambda x: distribution_b.OSDs[x].total_folder_size, osds_b))
            self.assertTrue(max(total_folder_sizes_b) is not min(total_folder_sizes_b))

            if list(list(distribution_a.OSDs.values())[0].folders.keys())[0] \
                    != list(list(distribution_b.OSDs.values())[0].folders.keys())[0]:
                a_b_different = True

        self.assertTrue(a_b_different)
124
125

    def test_lpt_distribution(self):
126
127
128
129
130
        folder_sizes = [3, 7, 11]
        num_folders = 4
        num_osds = 4
        osd_capacities = [0]

131
        # test for equally-sized OSDs
132
133
134
135
136
137
138
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(min(total_folder_sizes) == max(total_folder_sizes))

139
        # test 1 for differently-sized OSDs
140
141
142
        osd_bandwidths_1 = [10, 20]
        folder_sizes = [4, 4, 4, 4, 4, 4]

143
        distribution = dataDistribution.DataDistribution()
144
145
146
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths_1))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths_1))

147
148
149
150
151
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(2 * min(total_folder_sizes) == max(total_folder_sizes))

152
153
        # test 2 for differently-sized OSDs. the expected result is that the 4 large OSD receive 2 files each,
        # while the 4 small OSDs receive no files.
154
        osd_bandwidths_2 = [10, 30]
155
156
        folder_sizes = [1]
        num_folders = 8
157

158
        distribution = dataDistribution.DataDistribution()
159
160
161
162
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths_2))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths_2))

        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
163
164
165
166
167
168
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        self.assertEqual(0, min(total_folder_sizes))
        self.assertEqual(2, max(total_folder_sizes))

169
170
    def test_average_osd_processing_time(self):
        folder_sizes = [48, 123, 1, 7]
171
172
        num_folders = 2
        num_osds = 4
173
        osd_bandwidths = [10, 15]
174
175

        distribution = dataDistribution.DataDistribution()
176
177
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths))
178

179
180
181
182
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))

        average = 3.05
        self.assertEqual(average, distribution.get_average_processing_time())
183

184
185
186
187
188
189
190
191
192
193
194
195
    def test_average_total_folder_size(self):
        folder_sizes = [49, 123, 1, 7]
        num_folders = 2
        num_osds = 4
        osd_capacities = [100, 150]
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 create_osd_information(num_osds, osd_capacities),
                                 osd_capacity_key)

        average = (sum(folder_sizes) * num_folders) / (num_osds * len(osd_capacities))
196
        self.assertEqual(average, distribution.get_average_load())
197

198
199
200
201
202
203
204
205
    def test_rebalance_lpt(self):
        folder_sizes = [1]
        num_folders = 8
        osd_capacities = [10]
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
206
207
        distribution.set_osd_capacities(create_osd_information(num_osds, osd_capacities))

208
209
210
211
212
213
214
215
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
        distribution.rebalance_lpt()

        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))

216
217
218
    def test_rebalance_one_folder(self):
        folder_sizes = [1]
        num_folders = 8
219
        osd_bandwidths = [10]
220
221
222
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
223
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
224
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
225

226
        distribution.rebalance_one_folder()
227

228
229
230
231
232
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))

233
        osd_bandwidths = [10, 30]
234
235
236
        folder_sizes = [1]
        num_folders = 8
        distribution = dataDistribution.DataDistribution()
237
238
239
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
        distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths))

240
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
241
242

        distribution.rebalance_one_folder()
243
244
245
246
247
248
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        # all folders should now be on the 'large' OSDs
        self.assertEqual(0, min(total_folder_sizes))
        self.assertEqual(2, max(total_folder_sizes))
249

250
def create_test_osd_list(num_osds, osd_capacities):
251
252
    test_osds = []
    for i in range(0, num_osds):
253
254
        for osd_capacity in osd_capacities:
            test_osds.append(create_osd_id(osd_capacity, i))
255
256
257
    return test_osds


258
def create_test_folder_list(num_folders, folder_sizes):
259
260
    test_folders = []
    for i in range(0, num_folders):
261
262
263
        for folder_size in folder_sizes:
            new_folder = folder.Folder(folder_id_prefix + "_" + str(folder_size) + "_" + str(i), folder_size, None)
            test_folders.append(new_folder)
264
    random.shuffle(test_folders)
265
266
267
    return test_folders


268
def create_osd_information(num_osds, osd_capacities):
269
270
    osd_information = {}
    for i in range(0, num_osds):
271
272
        for osd_capacity in osd_capacities:
            osd_uuid = create_osd_id(osd_capacity, i)
273
            osd_information[osd_uuid] = osd_capacity
274
    return osd_information
275
276
277
278


def create_osd_id(capacity, index):
    return osd_id_prefix + "_" + str(capacity) + "_" + str(index)