test_dataDistribution.py 10.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import unittest

from xtreemfs_client import dataDistribution
from xtreemfs_client import osd
from xtreemfs_client import folder

osd_id_prefix = 'osd_'
folder_id_prefix = 'folder_'
osd_capacity_key = 'capacity'


class TestDataDistribution(unittest.TestCase):
    def setUp(self):
        osd_capacity = 100
        capacity_key = 'capacity'
        capacities = {}
        for i in range(0, 2):
            new_osd = osd.OSD('osd' + str(i))
            capacities[new_osd.uuid] = {}
            capacities[new_osd.uuid][capacity_key] = osd_capacity

    def test_totally_random_distribution(self):
        # generate some random distributions and check whether they are different
        max_osd_total_folder_size = 0
        felix_and_farouk_different = False

        num_osds = 3
28
        osds_capacities = [100]
29
        num_folders = 10
30
        folder_sizes = [20]
31
32
33

        for i in range(0, 100):
            distribution_felix = dataDistribution.DataDistribution()
34
35
36
            distribution_felix.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_felix.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                           osd_information=create_osd_information(num_osds, osds_capacities),
37
38
39
40
                                           random_osd_assignment=True,
                                           ignore_osd_capacities=True)

            distribution_farouk = dataDistribution.DataDistribution()
41
42
43
            distribution_farouk.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
            distribution_farouk.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                            osd_information=create_osd_information(num_osds, osds_capacities),
44
45
46
47
                                            random_osd_assignment=True,
                                            ignore_osd_capacities=True)

            osds_felix = distribution_felix.get_osd_list()
48
49
            osds_felix_total_folder_sizes = list(
                map(lambda x: distribution_felix.OSDs[x].total_folder_size, osds_felix))
50
51

            osds_farouk = distribution_farouk.get_osd_list()
52
53
            osds_farouk_total_folder_sizes = list(
                map(lambda x: distribution_farouk.OSDs[x].total_folder_size, osds_farouk))
54
55
56
57
58
59
60
61
62
63

            if osds_felix_total_folder_sizes[0] != osds_farouk_total_folder_sizes[0]:
                felix_and_farouk_different = True

            max_felix = max(osds_felix_total_folder_sizes)
            max_farouk = max(osds_farouk_total_folder_sizes)

            max_osd_total_folder_size = max(max_osd_total_folder_size, max_felix, max_farouk)

        self.assertTrue(felix_and_farouk_different)
64
        self.assertTrue(max_osd_total_folder_size > osds_capacities[0])
65
66

    def test_value_error(self):
Felix Seibert's avatar
Felix Seibert committed
67
        distribution = dataDistribution.DataDistribution()
68
        distribution.add_osd_list(create_test_osd_list(1, [0]))
Felix Seibert's avatar
Felix Seibert committed
69
        try:
70
            distribution.add_folders(create_test_folder_list(1, [1]),
Felix Seibert's avatar
Felix Seibert committed
71
72
73
                                     random_osd_assignment=True,
                                     ignore_osd_capacities=False)
        except ValueError:
74
            return  # expect value error
Felix Seibert's avatar
Felix Seibert committed
75
        self.fail("expect value error!")
76
77

    def test_random_distribution_respecting_capacities(self):
78
79
        # generate some random distributions and check whether they all respect the OSD capacities
        num_osds = 3
80
        osds_capacities = [100]
81
        num_folders = 10
82
83
84
        folder_size = [20]

        max_osd_total_folder_size = 0
85
86
87

        for i in range(0, 100):
            distribution = dataDistribution.DataDistribution()
88
            distribution.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
89
90
91
            distribution.add_folders(create_test_folder_list(num_folders, folder_size),
                                     random_osd_assignment=True,
                                     ignore_osd_capacities=False,
92
                                     osd_information=create_osd_information(num_osds, osds_capacities),
93
94
95
96
97
98
                                     capacity=osd_capacity_key)

            osds = distribution.get_osd_list()
            total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
            max_osd_total_folder_size = max(max(total_folder_sizes), max_osd_total_folder_size)

99
100
101
102
103
104
105
106
107
108
109
        self.assertTrue(max_osd_total_folder_size <= osds_capacities[0])

    def test_random_round_robin_distribution(self):
        # generate some random distributions
        # and check whether OSDs are almost-equally loaded and whether they are different
        num_osds = 3
        osd_capacities = [0]
        num_folders = 10
        folder_sizes = [1]

        a_b_different = False
110

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        for i in range(0, 100):
            distribution_a = dataDistribution.DataDistribution()
            distribution_a.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_a.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)
            distribution_b = dataDistribution.DataDistribution()
            distribution_b.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
            distribution_b.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                       random_osd_assignment=True,
                                       ignore_folder_sizes=True)

            osds_a = distribution_a.get_osd_list()
            total_folder_sizes_a = list(map(lambda x: distribution_a.OSDs[x].total_folder_size, osds_a))
            self.assertTrue(max(total_folder_sizes_a) is not min(total_folder_sizes_a))

            osds_b = distribution_b.get_osd_list()
            total_folder_sizes_b = list(map(lambda x: distribution_b.OSDs[x].total_folder_size, osds_b))
            self.assertTrue(max(total_folder_sizes_b) is not min(total_folder_sizes_b))

            if list(list(distribution_a.OSDs.values())[0].folders.keys())[0] \
                    != list(list(distribution_b.OSDs.values())[0].folders.keys())[0]:
                a_b_different = True

        self.assertTrue(a_b_different)
136
137

    def test_lpt_distribution(self):
138
139
140
141
142
143
144
145
        folder_sizes = [3, 7, 11]
        num_folders = 4
        num_osds = 4
        osd_capacities = [0]

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
146

147
148
149
150
151
152
153
154
155
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(min(total_folder_sizes) == max(total_folder_sizes))

        osd_capacities = [10, 20]
        folder_sizes = [4, 4, 4]
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
156

157
158
159
160
        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        self.assertTrue(2 * min(total_folder_sizes) == max(total_folder_sizes))

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    def test_average_osd_load(self):
        folder_sizes = [49, 123, 1, 7]
        num_folders = 2
        num_osds = 4
        osd_capacities = [100, 150]

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 create_osd_information(num_osds, osd_capacities),
                                 osd_capacity_key)

        average = (sum(folder_sizes) * num_folders) / (sum(osd_capacities * num_osds))
        self.assertEqual(average, distribution.get_average_osd_load(create_osd_information(num_osds, osd_capacities),
                                                                    osd_capacity_key))

177
178
179
180
181
182
183
184
185
186
187
188
189
    def test_average_total_folder_size(self):
        folder_sizes = [49, 123, 1, 7]
        num_folders = 2
        num_osds = 4
        osd_capacities = [100, 150]
        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes),
                                 create_osd_information(num_osds, osd_capacities),
                                 osd_capacity_key)

        average = (sum(folder_sizes) * num_folders) / (num_osds * len(osd_capacities))
        self.assertEqual(average, distribution.get_average_total_folder_size())
190

191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
    def test_rebalance_lpt(self):
        folder_sizes = [1]
        num_folders = 8
        osd_capacities = [10]
        num_osds = 4

        distribution = dataDistribution.DataDistribution()
        distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
        distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
        distribution.rebalance_lpt()

        osds = distribution.get_osd_list()
        total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
        # we should obtain a perfectly balanced distribution
        self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))


208
def create_test_osd_list(num_osds, osd_capacities):
209
210
    test_osds = []
    for i in range(0, num_osds):
211
212
        for osd_capacity in osd_capacities:
            test_osds.append(create_osd_id(osd_capacity, i))
213
214
215
    return test_osds


216
def create_test_folder_list(num_folders, folder_sizes):
217
218
    test_folders = []
    for i in range(0, num_folders):
219
220
221
        for folder_size in folder_sizes:
            new_folder = folder.Folder(folder_id_prefix + "_" + str(folder_size) + "_" + str(i), folder_size, None)
            test_folders.append(new_folder)
222
223
224
    return test_folders


225
def create_osd_information(num_osds, osd_capacities):
226
227
    osd_information = {}
    for i in range(0, num_osds):
228
229
230
231
        for osd_capacity in osd_capacities:
            osd_uuid = create_osd_id(osd_capacity, i)
            osd_information[osd_uuid] = {}
            osd_information[osd_uuid][osd_capacity_key] = osd_capacity
232
    return osd_information
233
234
235
236


def create_osd_id(capacity, index):
    return osd_id_prefix + "_" + str(capacity) + "_" + str(index)