dataDistribution.py 10.5 KB
Newer Older
1
2
import random

Felix Seibert's avatar
Felix Seibert committed
3
from xtreemfs_client import osd
4
from xtreemfs_client import folder
5

6

Felix Seibert's avatar
Felix Seibert committed
7
8
9
10
class DataDistribution(object):
    """
    class to keep track of the osd (object storage device) locations of different folders, i.e.,
    their physical location.
11

Felix Seibert's avatar
Felix Seibert committed
12
13
    this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder
    gets mapped to one OSD).
Felix Seibert's avatar
Felix Seibert committed
14
    """
15
16

    def __init__(self):
17
        self.OSDs = {}
18

19
    def add_new_osd(self, osd_uuid):
Felix Seibert's avatar
Felix Seibert committed
20
21
22
        """
        create a new empty osd and add it to the existing OSDs.
        """
23
24
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
25
            return
26
27
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
28

29
    def add_osd(self, new_osd):
Felix Seibert's avatar
Felix Seibert committed
30
31
32
        """
        add the given OSD (object) to the existing OSDs.
        """
33
34
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
35
            return
36
        self.OSDs[new_osd.uuid] = new_osd
37

38
    def add_osd_list(self, osd_list):
Felix Seibert's avatar
Felix Seibert committed
39
40
41
        """
        add the given list of OSDs (objects) to the existing OSDs.
        """
42
43
44
45
        for osd_uuid in osd_list:
            if osd_uuid not in self.OSDs:
                new_osd = osd.OSD(osd_uuid)
                self.OSDs[osd_uuid] = new_osd
46
47

    def get_osd_list(self):
Felix Seibert's avatar
Felix Seibert committed
48
        """
49
        get a list of all existing OSD uuids.
Felix Seibert's avatar
Felix Seibert committed
50
        """
51
        osd_list = []
52
        for osd_name in self.OSDs.keys():
53
54
55
            osd_list.append(osd_name)
        return osd_list

56
    def get_containing_osd(self, folder_id):
Felix Seibert's avatar
Felix Seibert committed
57
58
59
        """
        get the OSD containing the given folder_id, or None if the folder is not assigned to any OSD.
        """
60
61
62
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
63
64
        return None

Felix Seibert's avatar
Felix Seibert committed
65
66
67
68
69
70
71
72
73
74
75
    def assign_new_osd(self, folder_id, new_osd):
        """
        assign folder_id to new_osd. if folder_id already is assigned to an OSD, this old assignment is deleted.
        """
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)

76
    def get_average_folder_size(self):
Felix Seibert's avatar
Felix Seibert committed
77
78
79
        """
        get the average folder size of all folders of all OSDs.
        """
80
        total_size = 0
81
        total_number_of_folders = 0
82
        for one_osd in self.OSDs.values():
Felix Seibert's avatar
Felix Seibert committed
83
            total_size += one_osd.total_folder_size
84
            total_number_of_folders += len(one_osd.folders)
85
86
        if total_number_of_folders == 0:
            return 0
87
88
        return total_size / total_number_of_folders

89
90
91
92
93
94
95
96
97
98
99
100
    def get_average_osd_load(self, osd_information, capacity):
        """
        calculate the average OSD load, that is,
        the ratio between the sum of all folder sizes and the total OSD capacity.
        """
        total_folder_size = 0
        total_osd_capacity = 0
        for osd_uuid in self.OSDs.keys():
            total_folder_size += self.OSDs[osd_uuid].total_folder_size
            total_osd_capacity += osd_information[osd_uuid][capacity]
        return total_folder_size / total_osd_capacity

101
102
103
104
105
106
107
108
109
110
111
112
    def get_average_total_folder_size(self):
        """
        calculate the average total_folder_size of the OSDs.
        """
        total_folder_size = 0
        num_osds = 0
        for osd in self.OSDs.values():
            total_folder_size += osd.total_folder_size
            num_osds += 1

        return total_folder_size / num_osds

113
114
115
116
    def add_folders(self, folders,
                    osd_information=None, ratio_parameter='', capacity='',
                    ignore_osd_capacities=True,
                    random_osd_assignment=False,
117
118
                    ignore_folder_sizes=False,
                    debug=False):
Felix Seibert's avatar
Felix Seibert committed
119
120
        """
        adds a list of folders to the data distribution.
Felix Seibert's avatar
Felix Seibert committed
121
        if not specified otherwise, the assignments are calculated using the LPT algorithm.
Felix Seibert's avatar
Felix Seibert committed
122
123
        returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
        if osd_information and ratio_parameter are given,
        OSDs are assigned data proportionally to their ratio_parameter.

        osd_information is a map (that we now call outer map) that contains, for each OSD, an inner_map.
        outer_map[osd_uuid][ratio_parameter] is used to calculate the proportion of data assigned to OSD with uuid
        osd_uuid.

        if ignore_osd_capacities=False,
        outer_map[osd_uuid][capacity] is used (only in combination with random_osd_assignment=True)
        to generate random assignments that do not surpass the capacities of the OSDs.
        (random assignment respecting OSD capacities)

        if random_osd_assignment=True and ignore_osd_capacities=True, a totally random OSD assignment generated.

        if random_osd_assignment=True and ignore_folder_sizes=True,
        folders are randomly assigned to OSDs such that all OSDs have the same number of folders (if possible).
140
141

        the assignment is stable (i.e., folders already assigned to an OSD are not reassigned to another OSD).
Felix Seibert's avatar
Felix Seibert committed
142
        """
143
144

        # find out which folders are not assigned yet
145
        new_folders = []
146
147
        for a_folder in folders:
            containing_osd = self.get_containing_osd(a_folder.id)
148
            if containing_osd is not None:
149
                containing_osd.add_folder(a_folder.id, a_folder.size)
150
            else:
151
                new_folders.append(a_folder)
152

153
154
        if debug:
            print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment))
Felix Seibert's avatar
Felix Seibert committed
155

156
157
        # keep track of which unassigned folder gets assigned to which OSD.
        # this information must be returned
158
159
        osds_for_new_folders = []

160
161
        # totally random OSD assignment, even ignoring OSD capacities
        # (might lead to I/O errors when too many groups are assigned to an OSD)
162
        if random_osd_assignment and ignore_osd_capacities and not ignore_folder_sizes:
163
164
            if debug:
                print("using totally random osd assignment")
165
            for a_folder in new_folders:
Felix Seibert's avatar
Felix Seibert committed
166
                random_osd = random.choice(list(self.OSDs.values()))
167
168
                random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
Felix Seibert's avatar
Felix Seibert committed
169
                                             random_osd.uuid))
Felix Seibert's avatar
Felix Seibert committed
170
            return osds_for_new_folders
171

172
173
        # random OSD assignment respecting OSD capacities
        elif random_osd_assignment and not ignore_osd_capacities:
174
            if osd_information is None or capacity == '':
175
176
                raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is"
                                 "not given!")
177
178
            if debug:
                print("using random osd assignment, respecting osd capacities")
179
            for a_folder in new_folders:
180
                suitable_osds = []  # list of OSDs with enough capacity
181
                for one_osd in self.OSDs.values():
182
                    if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - a_folder.size >= 0:
183
184
                        suitable_osds.append(one_osd)
                suitable_random_osd = random.choice(suitable_osds)
185
186
                suitable_random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
187
188
189
                                             suitable_random_osd.uuid))
            return osds_for_new_folders

190
191
        # random OSD assignment ignoring folder sizes
        elif random_osd_assignment and ignore_folder_sizes:
192
193
194
195
196
197
198
199
200
201
            if debug:
                print("using random osd assignment ignoring folder sizes")

            average_folder_size = self.get_average_folder_size()
            if average_folder_size == 0:
                average_folder_size = 1

            modified_folders = list(map(lambda f: folder.Folder(f.id, average_folder_size, f.origin), folders))
            random.shuffle(modified_folders)
            return self.add_folders(modified_folders)
202

203
204
        # balanced deterministic OSD assignment (LPT)
        # (following largest processing time first, also called post-greedy approach)
205
206
        list.sort(new_folders, key=lambda x: x.size, reverse=True)

207
        osd_ratios = {}
208
209

        # ratios are given - use them to assign proportionally
210
211
212
213
214
215
216
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

217
        # no ratios are given - assume that all OSDs have same capacity
218
219
220
221
        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

222
        for a_folder in new_folders:
223
224
225
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
226
                                        one_osd.total_folder_size / osd_ratios[one_osd.uuid] \
Felix Seibert's avatar
Felix Seibert committed
227
                                <= least_used_osd.total_folder_size / osd_ratios[least_used_osd.uuid]:
228
                    least_used_osd = one_osd
229
230
            least_used_osd.add_folder(a_folder.id, a_folder.size)
            osds_for_new_folders.append((a_folder.id,
231
                                         least_used_osd.uuid))
232
233
234
        return osds_for_new_folders

    def update_folder(self, folder, size):
Felix Seibert's avatar
Felix Seibert committed
235
236
237
        """
        updates the size of a given folder
        """
238
239
240
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
241
242

    def description(self):
Felix Seibert's avatar
Felix Seibert committed
243
244
245
        """
        generates a string describing this data distribution
        """
246
        string = ""
247
248
        for one_osd in self.OSDs.values():
            string += str(one_osd)
249
            string += "\n"
250
            string += "folders : " + str(one_osd.folders)
251
            string += "\n"
252
        string += "average folder size: " + str(self.get_average_folder_size())
253
254
255
        return string

    def __str__(self):
256
257
258
259
260
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation