dataDistribution.py 7.91 KB
Newer Older
1
2
import random

Felix Seibert's avatar
Felix Seibert committed
3
from xtreemfs_client import osd
4

5

Felix Seibert's avatar
Felix Seibert committed
6
7
8
9
class DataDistribution(object):
    """
    class to keep track of the osd (object storage device) locations of different folders, i.e.,
    their physical location.
10

Felix Seibert's avatar
Felix Seibert committed
11
12
    this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder
    gets mapped to one OSD).
Felix Seibert's avatar
Felix Seibert committed
13
    """
14
15

    def __init__(self):
16
        self.OSDs = {}
17

18
    def add_new_osd(self, osd_uuid):
Felix Seibert's avatar
Felix Seibert committed
19
20
21
        """
        create a new empty osd and add it to the existing OSDs.
        """
22
23
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
24
            return
25
26
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
27

28
    def add_osd(self, new_osd):
Felix Seibert's avatar
Felix Seibert committed
29
30
31
        """
        add the given OSD (object) to the existing OSDs.
        """
32
33
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
34
            return
35
        self.OSDs[new_osd.uuid] = new_osd
36

37
    def add_osd_list(self, osd_list):
Felix Seibert's avatar
Felix Seibert committed
38
39
40
        """
        add the given list of OSDs (objects) to the existing OSDs.
        """
41
42
43
44
        for osd_uuid in osd_list:
            if osd_uuid not in self.OSDs:
                new_osd = osd.OSD(osd_uuid)
                self.OSDs[osd_uuid] = new_osd
45
46

    def get_osd_list(self):
Felix Seibert's avatar
Felix Seibert committed
47
        """
48
        get a list of all existing OSD uuids.
Felix Seibert's avatar
Felix Seibert committed
49
        """
50
        osd_list = []
51
        for osd_name in self.OSDs.keys():
52
53
54
            osd_list.append(osd_name)
        return osd_list

55
    def get_containing_osd(self, folder_id):
Felix Seibert's avatar
Felix Seibert committed
56
57
58
        """
        get the OSD containing the given folder_id, or None if the folder is not assigned to any OSD.
        """
59
60
61
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
62
63
        return None

64
    def get_average_folder_size(self):
Felix Seibert's avatar
Felix Seibert committed
65
66
67
        """
        get the average folder size of all folders of all OSDs.
        """
68
        total_size = 0
69
        total_number_of_folders = 0
70
        for one_osd in self.OSDs.values():
Felix Seibert's avatar
Felix Seibert committed
71
            total_size += one_osd.total_folder_size
72
            total_number_of_folders += len(one_osd.folders)
73
74
        if total_number_of_folders == 0:
            return 0
75
76
77
        return total_size / total_number_of_folders

    def assign_new_osd(self, folder_id, new_osd):
Felix Seibert's avatar
Felix Seibert committed
78
79
80
        """
        assign folder_id to new_osd. if folder_id already is assigned to an OSD, this old assignment is deleted.
        """
81
82
83
84
85
86
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)
87

88
89
90
91
    def add_folders(self, folders,
                    osd_information=None, ratio_parameter='', capacity='',
                    ignore_osd_capacities=True,
                    random_osd_assignment=False,
92
93
                    ignore_folder_sizes=False,
                    debug=False):
94
        # TODO update doc
Felix Seibert's avatar
Felix Seibert committed
95
96
        """
        adds a list of folders to the data distribution.
Felix Seibert's avatar
Felix Seibert committed
97
        if not specified otherwise, the assignments are calculated using the LPT algorithm.
Felix Seibert's avatar
Felix Seibert committed
98
99
100
        returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.

        if the optional arguments are given, OSDs are assigned data proportionally to their ratio_parameter.
101
102

        the assignment is stable (i.e., folders already assigned to an OSD are not reassigned to another OSD).
Felix Seibert's avatar
Felix Seibert committed
103
        """
104
105
        new_folders = []
        for folder in folders:
106
107
108
            containing_osd = self.get_containing_osd(folder.id)
            if containing_osd is not None:
                containing_osd.add_folder(folder.id, folder.size)
109
110
111
            else:
                new_folders.append(folder)

112
113
        if debug:
            print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment))
Felix Seibert's avatar
Felix Seibert committed
114

115
116
        osds_for_new_folders = []

117
118
        # totally random OSD assignment, even ignoring OSD capacities
        # (might lead to I/O errors when too many groups are assigned to an OSD)
119
        if random_osd_assignment and ignore_osd_capacities:
120
121
            if debug:
                print("using totally random osd assignment")
122
            for folder in new_folders:
Felix Seibert's avatar
Felix Seibert committed
123
124
                random_osd = random.choice(list(self.OSDs.values()))
                random_osd.add_folder(folder.id, folder.size)
125
                osds_for_new_folders.append((folder.id,
Felix Seibert's avatar
Felix Seibert committed
126
                                             random_osd.uuid))
Felix Seibert's avatar
Felix Seibert committed
127
            return osds_for_new_folders
128

129
130
131
132
133
        # random OSD assignment respecting OSD capacities
        elif random_osd_assignment and not ignore_osd_capacities:
            if osd_information is None or ratio_parameter == '':
                raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is"
                                 "not given!")
134
135
            if debug:
                print("using random osd assignment, respecting osd capacities")
136
137
138
139
140
141
142
143
144
145
146
            for folder in new_folders:
                suitable_osds = [] # list of OSDs with enough capacity
                for one_osd in self.OSDs.values():
                    if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - folder.size >= 0:
                        suitable_osds.append(one_osd)
                suitable_random_osd = random.choice(suitable_osds)
                suitable_random_osd.add_folder(folder.id, folder.size)
                osds_for_new_folders.append((folder.id,
                                             suitable_random_osd.uuid))
            return osds_for_new_folders

147
148
149
150
        # balanced deterministic OSD assignment
        # (following largest processing time first or post-greedy approach)
        list.sort(new_folders, key=lambda x: x.size, reverse=True)

151
152
153
154
155
156
157
158
159
160
161
162
        osd_ratios = {}
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

163
        for folder in new_folders:
164
165
166
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
Felix Seibert's avatar
Felix Seibert committed
167
168
                                one_osd.total_folder_size / osd_ratios[one_osd.uuid] \
                                <= least_used_osd.total_folder_size / osd_ratios[least_used_osd.uuid]:
169
170
                    least_used_osd = one_osd
            least_used_osd.add_folder(folder.id, folder.size)
171
172
            osds_for_new_folders.append((folder.id,
                                         least_used_osd.uuid))
173

174
175
176
        return osds_for_new_folders

    def update_folder(self, folder, size):
Felix Seibert's avatar
Felix Seibert committed
177
178
179
        """
        updates the size of a given folder
        """
180
181
182
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
183
184

    def description(self):
Felix Seibert's avatar
Felix Seibert committed
185
186
187
        """
        generates a string describing this data distribution
        """
188
        string = ""
189
190
        for one_osd in self.OSDs.values():
            string += str(one_osd)
191
            string += "\n"
192
            string += "folders : " + str(one_osd.folders)
193
            string += "\n"
194
        string += "average folder size: " + str(self.get_average_folder_size())
195
196
197
        return string

    def __str__(self):
198
199
200
201
202
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation