test_dataDistribution.py 12.9 KB
Newer Older
1
import random
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import unittest

from xtreemfs_client import dataDistribution
from xtreemfs_client import osd
from xtreemfs_client import folder

osd_id_prefix = 'osd_'
folder_id_prefix = 'folder_'
osd_capacity_key = 'capacity'


class TestDataDistribution(unittest.TestCase):
    def setUp(self):
        osd_capacity = 100
        capacity_key = 'capacity'
        capacities = {}
        for i in range(0, 2):
            new_osd = osd.OSD('osd' + str(i))
            capacities[new_osd.uuid] = {}
            capacities[new_osd.uuid][capacity_key] = osd_capacity

    def test_totally_random_distribution(self):
        # generate some random distributions and check whether they are different
        max_osd_total_folder_size = 0
        felix_and_farouk_different = False

        num_osds = 3
29
        osds_capacities = [100]
30
        num_folders = 10
31
        folder_sizes = [20]
32
33
34

        for i in range(0, 100):
            distribution_felix = dataDistribution.DataDistribution()
35
36
37
            distribution_felix.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_felix.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                           osd_information=create_osd_information(num_osds, osds_capacities),
38
39
40
41
                                           random_osd_assignment=True,
                                           ignore_osd_capacities=True)

            distribution_farouk = dataDistribution.DataDistribution()
42
43
44
            distribution_farouk.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_farouk.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                            osd_information=create_osd_information(num_osds, osds_capacities),
45
46
47
48
                                            random_osd_assignment=True,
                                            ignore_osd_capacities=True)

            osds_felix = distribution_felix.get_osd_list()
49
50
            osds_felix_total_folder_sizes = list(
                map(lambda x: distribution_felix.OSDs[x].total_folder_size, osds_felix))
51
52

            osds_farouk = distribution_farouk.get_osd_list()
53
54
            osds_farouk_total_folder_sizes = list(
                map(lambda x: distribution_farouk.OSDs[x].total_folder_size, osds_farouk))
55
56
57
58
59
60
61
62
63
64

            if osds_felix_total_folder_sizes[0] != osds_farouk_total_folder_sizes[0]:
                felix_and_farouk_different = True

            max_felix = max(osds_felix_total_folder_sizes)
            max_farouk = max(osds_farouk_total_folder_sizes)

            max_osd_total_folder_size = max(max_osd_total_folder_size, max_felix, max_farouk)

        self.assertTrue(felix_and_farouk_different)
65
        self.assertTrue(max_osd_total_folder_size > osds_capacities[0])
66
67

    def test_value_error(self):
Felix Seibert's avatar
Felix Seibert committed
68
        distribution = dataDistribution.DataDistribution()
69
        distribution.add_osd_list(create_test_osd_list(1, [0]))
Felix Seibert's avatar
Felix Seibert committed
70
        try:
71
            distribution.add_folders(create_test_folder_list(1, [1]),
Felix Seibert's avatar
Felix Seibert committed
72
73
74
                                     random_osd_assignment=True,
                                     ignore_osd_capacities=False)
        except ValueError:
75
            return  # expect value error
Felix Seibert's avatar
Felix Seibert committed
76
        self.fail("expect value error!")
77
78

    def test_random_distribution_respecting_capacities(self):
79
80
        # generate some random distributions and check whether they all respect the OSD capacities
        num_osds = 3
81
        osds_capacities = [100]
82
        num_folders = 10
83
84
85
        folder_size = [20]

        max_osd_total_folder_size = 0
86
87
88

        for i in range(0, 100):
            distribution = dataDistribution.DataDistribution()
89
            distribution.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
90
91
92
            distribution.add_folders(create_test_folder_list(num_folders, folder_size),
                                     random_osd_assignment=True,
                                     ignore_osd_capacities=False,
93
                                     osd_information=create_osd_information(num_osds, osds_capacities),
94
95
96
97
98
99
                                     capacity=osd_capacity_key)

            osds = distribution.get_osd_list()
            total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
            max_osd_total_folder_size = max(max(total_folder_sizes), max_osd_total_folder_size)

100
101
102
103
104
105
106
107
108
109
110
        self.assertTrue(max_osd_total_folder_size <= osds_capacities[0])

    def test_random_round_robin_distribution(self):
        # generate some random distributions
        # and check whether OSDs are almost-equally loaded and whether they are different
        num_osds = 3
        osd_capacities = [0]
        num_folders = 10
        folder_sizes = [1]

        a_b_different = False
111

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
        for i in range(0, 100):
            distribution_a = dataDistribution.DataDistribution()
            distribution_a.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_a.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)
            distribution_b = dataDistribution.DataDistribution()
            distribution_b.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_b.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)

            osds_a = distribution_a.get_osd_list()
            total_folder_sizes_a = list(map(lambda x: distribution_a.OSDs[x].total_folder_size, osds_a))
            self.assertTrue(max(total_folder_sizes_a) is not min(total_folder_sizes_a))

            osds_b = distribution_b.get_osd_list()
            total_folder_sizes_b = list(map(lambda x: distribution_b.OSDs[x].total_folder_size, osds_b))
            self.assertTrue(max(total_folder_sizes_b) is not min(total_folder_sizes_b))

            if list(list(distribution_a.OSDs.values())[0].folders.keys())[0] \
                    != list(list(distribution_b.OSDs.values())[0].folders.keys())[0]:
                a_b_different = True

        self.assertTrue(a_b_different)
137
138

    def test_lpt_distribution(self):
139
140
141
142
143
        folder_sizes = [3, 7, 11]
        num_folders = 4
        num_osds = 4
        osd_capacities = [0]

144
        # test for equally-sized OSDs
145
146
147
148
149
150
151
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(min(total_folder_sizes) == max(total_folder_sizes))

152
        # test 1 for differently-sized OSDs
153
154
155
156
157
158
159
160
161
        osd_capacities = [10, 20]
        folder_sizes = [4, 4, 4]
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(2 * min(total_folder_sizes) == max(total_folder_sizes))

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
        # test 2 for differently-sized OSDs. the expected result is that the 4 large OSD receive 2 files each,
        # while the 4 small OSDs receive no files.
        osd_capacities = [10, 30]
        folder_sizes = [1]
        num_folders = 8
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 osd_information=create_osd_information(num_osds, osd_capacities),
                                 ratio_parameter=osd_capacity_key)
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        self.assertEqual(0, min(total_folder_sizes))
        self.assertEqual(2, max(total_folder_sizes))

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    def test_average_osd_load(self):
        folder_sizes = [49, 123, 1, 7]
        num_folders = 2
        num_osds = 4
        osd_capacities = [100, 150]

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 create_osd_information(num_osds, osd_capacities),
                                 osd_capacity_key)

        average = (sum(folder_sizes) * num_folders) / (sum(osd_capacities * num_osds))
        self.assertEqual(average, distribution.get_average_osd_load(create_osd_information(num_osds, osd_capacities),
                                                                    osd_capacity_key))

194
195
196
197
198
199
200
201
202
203
204
205
206
    def test_average_total_folder_size(self):
        folder_sizes = [49, 123, 1, 7]
        num_folders = 2
        num_osds = 4
        osd_capacities = [100, 150]
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 create_osd_information(num_osds, osd_capacities),
                                 osd_capacity_key)

        average = (sum(folder_sizes) * num_folders) / (num_osds * len(osd_capacities))
        self.assertEqual(average, distribution.get_average_total_folder_size())
207

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    def test_rebalance_lpt(self):
        folder_sizes = [1]
        num_folders = 8
        osd_capacities = [10]
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
        distribution.rebalance_lpt()

        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
    def test_rebalance_one_folder(self):
        folder_sizes = [1]
        num_folders = 8
        osd_capacities = [10]
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
        distribution.rebalance_one_folder()
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))

        osd_capacities = [10, 30]
        folder_sizes = [1]
        num_folders = 8
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
        distribution.rebalance_one_folder(osd_information=create_osd_information(num_osds, osd_capacities),
                                          capacity=osd_capacity_key)
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))

        # all folders should now be on the 'large' OSDs
        self.assertEqual(0, min(total_folder_sizes))
        self.assertEqual(2, max(total_folder_sizes))
253

254
def create_test_osd_list(num_osds, osd_capacities):
255
256
    test_osds = []
    for i in range(0, num_osds):
257
258
        for osd_capacity in osd_capacities:
            test_osds.append(create_osd_id(osd_capacity, i))
259
260
261
    return test_osds


262
def create_test_folder_list(num_folders, folder_sizes):
263
264
    test_folders = []
    for i in range(0, num_folders):
265
266
267
        for folder_size in folder_sizes:
            new_folder = folder.Folder(folder_id_prefix + "_" + str(folder_size) + "_" + str(i), folder_size, None)
            test_folders.append(new_folder)
268
    random.shuffle(test_folders)
269
270
271
    return test_folders


272
def create_osd_information(num_osds, osd_capacities):
273
274
    osd_information = {}
    for i in range(0, num_osds):
275
276
277
278
        for osd_capacity in osd_capacities:
            osd_uuid = create_osd_id(osd_capacity, i)
            osd_information[osd_uuid] = {}
            osd_information[osd_uuid][osd_capacity_key] = osd_capacity
279
    return osd_information
280
281
282
283


def create_osd_id(capacity, index):
    return osd_id_prefix + "_" + str(capacity) + "_" + str(index)