dataDistribution.py 10.2 KB
Newer Older
1
2
import random

Felix Seibert's avatar
Felix Seibert committed
3
from xtreemfs_client import osd
4
from xtreemfs_client import folder
5

6

Felix Seibert's avatar
Felix Seibert committed
7
8
9
10
class DataDistribution(object):
    """
    class to keep track of the osd (object storage device) locations of different folders, i.e.,
    their physical location.
11

Felix Seibert's avatar
Felix Seibert committed
12
13
    this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder
    gets mapped to one OSD).
Felix Seibert's avatar
Felix Seibert committed
14
    """
15
16

    def __init__(self):
17
        self.OSDs = {}
18

19
    def add_new_osd(self, osd_uuid):
Felix Seibert's avatar
Felix Seibert committed
20
21
22
        """
        create a new empty osd and add it to the existing OSDs.
        """
23
24
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
25
            return
26
27
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
28

29
    def add_osd(self, new_osd):
Felix Seibert's avatar
Felix Seibert committed
30
31
32
        """
        add the given OSD (object) to the existing OSDs.
        """
33
34
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
35
            return
36
        self.OSDs[new_osd.uuid] = new_osd
37

38
    def add_osd_list(self, osd_list):
Felix Seibert's avatar
Felix Seibert committed
39
40
41
        """
        add the given list of OSDs (objects) to the existing OSDs.
        """
42
43
44
45
        for osd_uuid in osd_list:
            if osd_uuid not in self.OSDs:
                new_osd = osd.OSD(osd_uuid)
                self.OSDs[osd_uuid] = new_osd
46
47

    def get_osd_list(self):
Felix Seibert's avatar
Felix Seibert committed
48
        """
49
        get a list of all existing OSD uuids.
Felix Seibert's avatar
Felix Seibert committed
50
        """
51
        osd_list = []
52
        for osd_name in self.OSDs.keys():
53
54
55
            osd_list.append(osd_name)
        return osd_list

56
    def get_containing_osd(self, folder_id):
Felix Seibert's avatar
Felix Seibert committed
57
58
59
        """
        get the OSD containing the given folder_id, or None if the folder is not assigned to any OSD.
        """
60
61
62
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
63
64
        return None

Felix Seibert's avatar
Felix Seibert committed
65
66
67
68
69
70
71
72
73
74
75
    def assign_new_osd(self, folder_id, new_osd):
        """
        assign folder_id to new_osd. if folder_id already is assigned to an OSD, this old assignment is deleted.
        """
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)

76
    def get_average_folder_size(self):
Felix Seibert's avatar
Felix Seibert committed
77
78
79
        """
        get the average folder size of all folders of all OSDs.
        """
80
        total_size = 0
81
        total_number_of_folders = 0
82
        for one_osd in self.OSDs.values():
Felix Seibert's avatar
Felix Seibert committed
83
            total_size += one_osd.total_folder_size
84
            total_number_of_folders += len(one_osd.folders)
85
86
        if total_number_of_folders == 0:
            return 0
87
88
        return total_size / total_number_of_folders

89
90
91
92
93
94
95
96
97
98
99
100
    def get_average_osd_load(self, osd_information, capacity):
        """
        calculate the average OSD load, that is,
        the ratio between the sum of all folder sizes and the total OSD capacity.
        """
        total_folder_size = 0
        total_osd_capacity = 0
        for osd_uuid in self.OSDs.keys():
            total_folder_size += self.OSDs[osd_uuid].total_folder_size
            total_osd_capacity += osd_information[osd_uuid][capacity]
        return total_folder_size / total_osd_capacity

101
102
103
104
    def add_folders(self, folders,
                    osd_information=None, ratio_parameter='', capacity='',
                    ignore_osd_capacities=True,
                    random_osd_assignment=False,
105
106
                    ignore_folder_sizes=False,
                    debug=False):
Felix Seibert's avatar
Felix Seibert committed
107
108
        """
        adds a list of folders to the data distribution.
Felix Seibert's avatar
Felix Seibert committed
109
        if not specified otherwise, the assignments are calculated using the LPT algorithm.
Felix Seibert's avatar
Felix Seibert committed
110
111
        returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.

112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
        if osd_information and ratio_parameter are given,
        OSDs are assigned data proportionally to their ratio_parameter.

        osd_information is a map (that we now call outer map) that contains, for each OSD, an inner_map.
        outer_map[osd_uuid][ratio_parameter] is used to calculate the proportion of data assigned to OSD with uuid
        osd_uuid.

        if ignore_osd_capacities=False,
        outer_map[osd_uuid][capacity] is used (only in combination with random_osd_assignment=True)
        to generate random assignments that do not surpass the capacities of the OSDs.
        (random assignment respecting OSD capacities)

        if random_osd_assignment=True and ignore_osd_capacities=True, a totally random OSD assignment generated.

        if random_osd_assignment=True and ignore_folder_sizes=True,
        folders are randomly assigned to OSDs such that all OSDs have the same number of folders (if possible).
128
129

        the assignment is stable (i.e., folders already assigned to an OSD are not reassigned to another OSD).
Felix Seibert's avatar
Felix Seibert committed
130
        """
131
132

        # find out which folders are not assigned yet
133
        new_folders = []
134
135
        for a_folder in folders:
            containing_osd = self.get_containing_osd(a_folder.id)
136
            if containing_osd is not None:
137
                containing_osd.add_folder(a_folder.id, a_folder.size)
138
            else:
139
                new_folders.append(a_folder)
140

141
142
        if debug:
            print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment))
Felix Seibert's avatar
Felix Seibert committed
143

144
145
        # keep track of which unassigned folder gets assigned to which OSD.
        # this information must be returned
146
147
        osds_for_new_folders = []

148
149
        # totally random OSD assignment, even ignoring OSD capacities
        # (might lead to I/O errors when too many groups are assigned to an OSD)
150
        if random_osd_assignment and ignore_osd_capacities and not ignore_folder_sizes:
151
152
            if debug:
                print("using totally random osd assignment")
153
            for a_folder in new_folders:
Felix Seibert's avatar
Felix Seibert committed
154
                random_osd = random.choice(list(self.OSDs.values()))
155
156
                random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
Felix Seibert's avatar
Felix Seibert committed
157
                                             random_osd.uuid))
Felix Seibert's avatar
Felix Seibert committed
158
            return osds_for_new_folders
159

160
161
        # random OSD assignment respecting OSD capacities
        elif random_osd_assignment and not ignore_osd_capacities:
162
            if osd_information is None or capacity == '':
163
164
                raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is"
                                 "not given!")
165
166
            if debug:
                print("using random osd assignment, respecting osd capacities")
167
            for a_folder in new_folders:
168
                suitable_osds = []  # list of OSDs with enough capacity
169
                for one_osd in self.OSDs.values():
170
                    if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - a_folder.size >= 0:
171
172
                        suitable_osds.append(one_osd)
                suitable_random_osd = random.choice(suitable_osds)
173
174
                suitable_random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
175
176
177
                                             suitable_random_osd.uuid))
            return osds_for_new_folders

178
179
        # random OSD assignment ignoring folder sizes
        elif random_osd_assignment and ignore_folder_sizes:
180
181
182
183
184
185
186
187
188
189
            if debug:
                print("using random osd assignment ignoring folder sizes")

            average_folder_size = self.get_average_folder_size()
            if average_folder_size == 0:
                average_folder_size = 1

            modified_folders = list(map(lambda f: folder.Folder(f.id, average_folder_size, f.origin), folders))
            random.shuffle(modified_folders)
            return self.add_folders(modified_folders)
190

191
192
        # balanced deterministic OSD assignment (LPT)
        # (following largest processing time first, also called post-greedy approach)
193
194
        list.sort(new_folders, key=lambda x: x.size, reverse=True)

195
        osd_ratios = {}
196
197

        # ratios are given - use them to assign proportionally
198
199
200
201
202
203
204
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

205
        # no ratios are given - assume that all OSDs have same capacity
206
207
208
209
        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

210
        for a_folder in new_folders:
211
212
213
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
214
                                        one_osd.total_folder_size / osd_ratios[one_osd.uuid] \
Felix Seibert's avatar
Felix Seibert committed
215
                                <= least_used_osd.total_folder_size / osd_ratios[least_used_osd.uuid]:
216
                    least_used_osd = one_osd
217
218
            least_used_osd.add_folder(a_folder.id, a_folder.size)
            osds_for_new_folders.append((a_folder.id,
219
                                         least_used_osd.uuid))
220
221
222
        return osds_for_new_folders

    def update_folder(self, folder, size):
Felix Seibert's avatar
Felix Seibert committed
223
224
225
        """
        updates the size of a given folder
        """
226
227
228
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
229
230

    def description(self):
Felix Seibert's avatar
Felix Seibert committed
231
232
233
        """
        generates a string describing this data distribution
        """
234
        string = ""
235
236
        for one_osd in self.OSDs.values():
            string += str(one_osd)
237
            string += "\n"
238
            string += "folders : " + str(one_osd.folders)
239
            string += "\n"
240
        string += "average folder size: " + str(self.get_average_folder_size())
241
242
243
        return string

    def __str__(self):
244
245
246
247
248
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation