dataDistribution.py 9.67 KB
Newer Older
1
2
import random

Felix Seibert's avatar
Felix Seibert committed
3
from xtreemfs_client import osd
4
from xtreemfs_client import folder
5

6

Felix Seibert's avatar
Felix Seibert committed
7
8
9
10
class DataDistribution(object):
    """
    class to keep track of the osd (object storage device) locations of different folders, i.e.,
    their physical location.
11

Felix Seibert's avatar
Felix Seibert committed
12
13
    this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder
    gets mapped to one OSD).
Felix Seibert's avatar
Felix Seibert committed
14
    """
15
16

    def __init__(self):
17
        self.OSDs = {}
18

19
    def add_new_osd(self, osd_uuid):
Felix Seibert's avatar
Felix Seibert committed
20
21
22
        """
        create a new empty osd and add it to the existing OSDs.
        """
23
24
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
25
            return
26
27
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
28

29
    def add_osd(self, new_osd):
Felix Seibert's avatar
Felix Seibert committed
30
31
32
        """
        add the given OSD (object) to the existing OSDs.
        """
33
34
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
35
            return
36
        self.OSDs[new_osd.uuid] = new_osd
37

38
    def add_osd_list(self, osd_list):
Felix Seibert's avatar
Felix Seibert committed
39
40
41
        """
        add the given list of OSDs (objects) to the existing OSDs.
        """
42
43
44
45
        for osd_uuid in osd_list:
            if osd_uuid not in self.OSDs:
                new_osd = osd.OSD(osd_uuid)
                self.OSDs[osd_uuid] = new_osd
46
47

    def get_osd_list(self):
Felix Seibert's avatar
Felix Seibert committed
48
        """
49
        get a list of all existing OSD uuids.
Felix Seibert's avatar
Felix Seibert committed
50
        """
51
        osd_list = []
52
        for osd_name in self.OSDs.keys():
53
54
55
            osd_list.append(osd_name)
        return osd_list

56
    def get_containing_osd(self, folder_id):
Felix Seibert's avatar
Felix Seibert committed
57
58
59
        """
        get the OSD containing the given folder_id, or None if the folder is not assigned to any OSD.
        """
60
61
62
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
63
64
        return None

Felix Seibert's avatar
Felix Seibert committed
65
66
67
68
69
70
71
72
73
74
75
    def assign_new_osd(self, folder_id, new_osd):
        """
        assign folder_id to new_osd. if folder_id already is assigned to an OSD, this old assignment is deleted.
        """
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)

76
    def get_average_folder_size(self):
Felix Seibert's avatar
Felix Seibert committed
77
78
79
        """
        get the average folder size of all folders of all OSDs.
        """
80
        total_size = 0
81
        total_number_of_folders = 0
82
        for one_osd in self.OSDs.values():
Felix Seibert's avatar
Felix Seibert committed
83
            total_size += one_osd.total_folder_size
84
            total_number_of_folders += len(one_osd.folders)
85
86
        if total_number_of_folders == 0:
            return 0
87
88
        return total_size / total_number_of_folders

89
90
91
92
    def add_folders(self, folders,
                    osd_information=None, ratio_parameter='', capacity='',
                    ignore_osd_capacities=True,
                    random_osd_assignment=False,
93
94
                    ignore_folder_sizes=False,
                    debug=False):
Felix Seibert's avatar
Felix Seibert committed
95
96
        """
        adds a list of folders to the data distribution.
Felix Seibert's avatar
Felix Seibert committed
97
        if not specified otherwise, the assignments are calculated using the LPT algorithm.
Felix Seibert's avatar
Felix Seibert committed
98
99
        returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
        if osd_information and ratio_parameter are given,
        OSDs are assigned data proportionally to their ratio_parameter.

        osd_information is a map (that we now call outer map) that contains, for each OSD, an inner_map.
        outer_map[osd_uuid][ratio_parameter] is used to calculate the proportion of data assigned to OSD with uuid
        osd_uuid.

        if ignore_osd_capacities=False,
        outer_map[osd_uuid][capacity] is used (only in combination with random_osd_assignment=True)
        to generate random assignments that do not surpass the capacities of the OSDs.
        (random assignment respecting OSD capacities)

        if random_osd_assignment=True and ignore_osd_capacities=True, a totally random OSD assignment generated.

        if random_osd_assignment=True and ignore_folder_sizes=True,
        folders are randomly assigned to OSDs such that all OSDs have the same number of folders (if possible).
116
117

        the assignment is stable (i.e., folders already assigned to an OSD are not reassigned to another OSD).
Felix Seibert's avatar
Felix Seibert committed
118
        """
119
120

        # find out which folders are not assigned yet
121
        new_folders = []
122
123
        for a_folder in folders:
            containing_osd = self.get_containing_osd(a_folder.id)
124
            if containing_osd is not None:
125
                containing_osd.add_folder(a_folder.id, a_folder.size)
126
            else:
127
                new_folders.append(a_folder)
128

129
130
        if debug:
            print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment))
Felix Seibert's avatar
Felix Seibert committed
131

132
133
        # keep track of which unassigned folder gets assigned to which OSD.
        # this information must be returned
134
135
        osds_for_new_folders = []

136
137
        # totally random OSD assignment, even ignoring OSD capacities
        # (might lead to I/O errors when too many groups are assigned to an OSD)
138
        if random_osd_assignment and ignore_osd_capacities and not ignore_folder_sizes:
139
140
            if debug:
                print("using totally random osd assignment")
141
            for a_folder in new_folders:
Felix Seibert's avatar
Felix Seibert committed
142
                random_osd = random.choice(list(self.OSDs.values()))
143
144
                random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
Felix Seibert's avatar
Felix Seibert committed
145
                                             random_osd.uuid))
Felix Seibert's avatar
Felix Seibert committed
146
            return osds_for_new_folders
147

148
149
        # random OSD assignment respecting OSD capacities
        elif random_osd_assignment and not ignore_osd_capacities:
150
            if osd_information is None or capacity == '':
151
152
                raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is"
                                 "not given!")
153
154
            if debug:
                print("using random osd assignment, respecting osd capacities")
155
            for a_folder in new_folders:
156
                suitable_osds = []  # list of OSDs with enough capacity
157
                for one_osd in self.OSDs.values():
158
                    if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - a_folder.size >= 0:
159
160
                        suitable_osds.append(one_osd)
                suitable_random_osd = random.choice(suitable_osds)
161
162
                suitable_random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
163
164
165
                                             suitable_random_osd.uuid))
            return osds_for_new_folders

166
167
        # random OSD assignment ignoring folder sizes
        elif random_osd_assignment and ignore_folder_sizes:
168
169
170
171
172
173
174
175
176
177
            if debug:
                print("using random osd assignment ignoring folder sizes")

            average_folder_size = self.get_average_folder_size()
            if average_folder_size == 0:
                average_folder_size = 1

            modified_folders = list(map(lambda f: folder.Folder(f.id, average_folder_size, f.origin), folders))
            random.shuffle(modified_folders)
            return self.add_folders(modified_folders)
178

179
180
        # balanced deterministic OSD assignment (LPT)
        # (following largest processing time first, also called post-greedy approach)
181
182
        list.sort(new_folders, key=lambda x: x.size, reverse=True)

183
        osd_ratios = {}
184
185

        # ratios are given - use them to assign proportionally
186
187
188
189
190
191
192
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

193
        # no ratios are given - assume that all OSDs have same capacity
194
195
196
197
        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

198
        for a_folder in new_folders:
199
200
201
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
202
                                        one_osd.total_folder_size / osd_ratios[one_osd.uuid] \
Felix Seibert's avatar
Felix Seibert committed
203
                                <= least_used_osd.total_folder_size / osd_ratios[least_used_osd.uuid]:
204
                    least_used_osd = one_osd
205
206
            least_used_osd.add_folder(a_folder.id, a_folder.size)
            osds_for_new_folders.append((a_folder.id,
207
                                         least_used_osd.uuid))
208
209
210
        return osds_for_new_folders

    def update_folder(self, folder, size):
Felix Seibert's avatar
Felix Seibert committed
211
212
213
        """
        updates the size of a given folder
        """
214
215
216
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
217
218

    def description(self):
Felix Seibert's avatar
Felix Seibert committed
219
220
221
        """
        generates a string describing this data distribution
        """
222
        string = ""
223
224
        for one_osd in self.OSDs.values():
            string += str(one_osd)
225
            string += "\n"
226
            string += "folders : " + str(one_osd.folders)
227
            string += "\n"
228
        string += "average folder size: " + str(self.get_average_folder_size())
229
230
231
        return string

    def __str__(self):
232
233
234
235
236
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation