dataDistribution.py 8.56 KB
Newer Older
1
2
import random

Felix Seibert's avatar
Felix Seibert committed
3
from xtreemfs_client import osd
4
from xtreemfs_client import folder
5

6

Felix Seibert's avatar
Felix Seibert committed
7
8
9
10
class DataDistribution(object):
    """
    class to keep track of the osd (object storage device) locations of different folders, i.e.,
    their physical location.
11

Felix Seibert's avatar
Felix Seibert committed
12
13
    this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder
    gets mapped to one OSD).
Felix Seibert's avatar
Felix Seibert committed
14
    """
15
16

    def __init__(self):
17
        self.OSDs = {}
18

19
    def add_new_osd(self, osd_uuid):
Felix Seibert's avatar
Felix Seibert committed
20
21
22
        """
        create a new empty osd and add it to the existing OSDs.
        """
23
24
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
25
            return
26
27
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
28

29
    def add_osd(self, new_osd):
Felix Seibert's avatar
Felix Seibert committed
30
31
32
        """
        add the given OSD (object) to the existing OSDs.
        """
33
34
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
35
            return
36
        self.OSDs[new_osd.uuid] = new_osd
37

38
    def add_osd_list(self, osd_list):
Felix Seibert's avatar
Felix Seibert committed
39
40
41
        """
        add the given list of OSDs (objects) to the existing OSDs.
        """
42
43
44
45
        for osd_uuid in osd_list:
            if osd_uuid not in self.OSDs:
                new_osd = osd.OSD(osd_uuid)
                self.OSDs[osd_uuid] = new_osd
46
47

    def get_osd_list(self):
Felix Seibert's avatar
Felix Seibert committed
48
        """
49
        get a list of all existing OSD uuids.
Felix Seibert's avatar
Felix Seibert committed
50
        """
51
        osd_list = []
52
        for osd_name in self.OSDs.keys():
53
54
55
            osd_list.append(osd_name)
        return osd_list

56
    def get_containing_osd(self, folder_id):
Felix Seibert's avatar
Felix Seibert committed
57
58
59
        """
        get the OSD containing the given folder_id, or None if the folder is not assigned to any OSD.
        """
60
61
62
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
63
64
        return None

65
    def get_average_folder_size(self):
Felix Seibert's avatar
Felix Seibert committed
66
67
68
        """
        get the average folder size of all folders of all OSDs.
        """
69
        total_size = 0
70
        total_number_of_folders = 0
71
        for one_osd in self.OSDs.values():
Felix Seibert's avatar
Felix Seibert committed
72
            total_size += one_osd.total_folder_size
73
            total_number_of_folders += len(one_osd.folders)
74
75
        if total_number_of_folders == 0:
            return 0
76
77
78
        return total_size / total_number_of_folders

    def assign_new_osd(self, folder_id, new_osd):
Felix Seibert's avatar
Felix Seibert committed
79
80
81
        """
        assign folder_id to new_osd. if folder_id already is assigned to an OSD, this old assignment is deleted.
        """
82
83
84
85
86
87
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)
88

89
90
91
92
    def add_folders(self, folders,
                    osd_information=None, ratio_parameter='', capacity='',
                    ignore_osd_capacities=True,
                    random_osd_assignment=False,
93
94
                    ignore_folder_sizes=False,
                    debug=False):
95
        # TODO update doc
Felix Seibert's avatar
Felix Seibert committed
96
97
        """
        adds a list of folders to the data distribution.
Felix Seibert's avatar
Felix Seibert committed
98
        if not specified otherwise, the assignments are calculated using the LPT algorithm.
Felix Seibert's avatar
Felix Seibert committed
99
100
101
        returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.

        if the optional arguments are given, OSDs are assigned data proportionally to their ratio_parameter.
102
103

        the assignment is stable (i.e., folders already assigned to an OSD are not reassigned to another OSD).
Felix Seibert's avatar
Felix Seibert committed
104
        """
105
        new_folders = []
106
107
        for a_folder in folders:
            containing_osd = self.get_containing_osd(a_folder.id)
108
            if containing_osd is not None:
109
                containing_osd.add_folder(a_folder.id, a_folder.size)
110
            else:
111
                new_folders.append(a_folder)
112

113
114
        if debug:
            print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment))
Felix Seibert's avatar
Felix Seibert committed
115

116
117
        osds_for_new_folders = []

118
119
        # totally random OSD assignment, even ignoring OSD capacities
        # (might lead to I/O errors when too many groups are assigned to an OSD)
120
        if random_osd_assignment and ignore_osd_capacities and not ignore_folder_sizes:
121
122
            if debug:
                print("using totally random osd assignment")
123
            for a_folder in new_folders:
Felix Seibert's avatar
Felix Seibert committed
124
                random_osd = random.choice(list(self.OSDs.values()))
125
126
                random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
Felix Seibert's avatar
Felix Seibert committed
127
                                             random_osd.uuid))
Felix Seibert's avatar
Felix Seibert committed
128
            return osds_for_new_folders
129

130
131
        # random OSD assignment respecting OSD capacities
        elif random_osd_assignment and not ignore_osd_capacities:
132
            if osd_information is None or capacity == '':
133
134
                raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is"
                                 "not given!")
135
136
            if debug:
                print("using random osd assignment, respecting osd capacities")
137
            for a_folder in new_folders:
138
                suitable_osds = []  # list of OSDs with enough capacity
139
                for one_osd in self.OSDs.values():
140
                    if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - a_folder.size >= 0:
141
142
                        suitable_osds.append(one_osd)
                suitable_random_osd = random.choice(suitable_osds)
143
144
                suitable_random_osd.add_folder(a_folder.id, a_folder.size)
                osds_for_new_folders.append((a_folder.id,
145
146
147
                                             suitable_random_osd.uuid))
            return osds_for_new_folders

148
149
        # random OSD assignment ignoring folder sizes
        elif random_osd_assignment and ignore_folder_sizes:
150
151
152
153
154
155
156
157
158
159
            if debug:
                print("using random osd assignment ignoring folder sizes")

            average_folder_size = self.get_average_folder_size()
            if average_folder_size == 0:
                average_folder_size = 1

            modified_folders = list(map(lambda f: folder.Folder(f.id, average_folder_size, f.origin), folders))
            random.shuffle(modified_folders)
            return self.add_folders(modified_folders)
160

161
162
163
164
        # balanced deterministic OSD assignment
        # (following largest processing time first or post-greedy approach)
        list.sort(new_folders, key=lambda x: x.size, reverse=True)

165
166
167
168
169
170
171
172
173
174
175
176
        osd_ratios = {}
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

177
        for a_folder in new_folders:
178
179
180
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
181
                                        one_osd.total_folder_size / osd_ratios[one_osd.uuid] \
Felix Seibert's avatar
Felix Seibert committed
182
                                <= least_used_osd.total_folder_size / osd_ratios[least_used_osd.uuid]:
183
                    least_used_osd = one_osd
184
185
            least_used_osd.add_folder(a_folder.id, a_folder.size)
            osds_for_new_folders.append((a_folder.id,
186
                                         least_used_osd.uuid))
187

188
189
190
        return osds_for_new_folders

    def update_folder(self, folder, size):
Felix Seibert's avatar
Felix Seibert committed
191
192
193
        """
        updates the size of a given folder
        """
194
195
196
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
197
198

    def description(self):
Felix Seibert's avatar
Felix Seibert committed
199
200
201
        """
        generates a string describing this data distribution
        """
202
        string = ""
203
204
        for one_osd in self.OSDs.values():
            string += str(one_osd)
205
            string += "\n"
206
            string += "folders : " + str(one_osd.folders)
207
            string += "\n"
208
        string += "average folder size: " + str(self.get_average_folder_size())
209
210
211
        return string

    def __str__(self):
212
213
214
215
216
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation