dataDistribution.py 5.65 KB
Newer Older
1
2
import random

Felix Seibert's avatar
Felix Seibert committed
3
import osd
4

5

Felix Seibert's avatar
Felix Seibert committed
6
7
8
9
class DataDistribution(object):
    """
    class to keep track of the osd (object storage device) locations of different folders, i.e.,
    their physical location.
10

Felix Seibert's avatar
Felix Seibert committed
11
12
13
    this class also allows to calculate a 'good' osd for new data, based on the
    distribution known beforehand.
    """
14
15

    def __init__(self):
16
        self.OSDs = {}
17

18
19
20
    def add_new_osd(self, osd_uuid):
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
21
            return
22
23
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
24

25
26
27
    def add_osd(self, new_osd):
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
28
            return
29
        self.OSDs[new_osd.uuid] = new_osd
30

31
    def add_osd_list(self, osd_list):
32
33
34
35
        for osd_uuid in osd_list:
            if osd_uuid not in self.OSDs:
                new_osd = osd.OSD(osd_uuid)
                self.OSDs[osd_uuid] = new_osd
36
37
38

    def get_osd_list(self):
        osd_list = []
39
        for osd_name in self.OSDs.keys():
40
41
42
            osd_list.append(osd_name)
        return osd_list

43
44
45
46
    def get_containing_osd(self, folder_id):
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
47
48
        return None

49
50
    def get_average_folder_size(self):
        total_size = 0
51
        total_number_of_folders = 0
52
53
54
        for one_osd in self.OSDs.values():
            total_size += one_osd.totalFolderSize
            total_number_of_folders += len(one_osd.folders)
55
56
        if total_number_of_folders == 0:
            return 0
57
58
59
60
61
62
63
64
65
        return total_size / total_number_of_folders

    def assign_new_osd(self, folder_id, new_osd):
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)
66

67
    def add_folders(self, folders, osd_information=None, ratio_parameter='', random_osd_assignment=False):
Felix Seibert's avatar
Felix Seibert committed
68
69
70
71
72
73
        """
        adds a list of folders to the data distribution.
        returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.

        if the optional arguments are given, OSDs are assigned data proportionally to their ratio_parameter.
        """
74
75
        new_folders = []
        for folder in folders:
76
77
78
            containing_osd = self.get_containing_osd(folder.id)
            if containing_osd is not None:
                containing_osd.add_folder(folder.id, folder.size)
79
80
81
            else:
                new_folders.append(folder)

Felix Seibert's avatar
Felix Seibert committed
82
        print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment))
Felix Seibert's avatar
Felix Seibert committed
83

84
85
        osds_for_new_folders = []

86
87
88
        # totally random OSD assignment, even ignoring OSD capacities
        # (might lead to I/O errors when too many groups are assigned to an OSD)
        if random_osd_assignment:
Felix Seibert's avatar
Felix Seibert committed
89
            print("using random osd assignment")
90
            for folder in new_folders:
Felix Seibert's avatar
Felix Seibert committed
91
92
                random_osd = random.choice(list(self.OSDs.values()))
                random_osd.add_folder(folder.id, folder.size)
93
                osds_for_new_folders.append((folder.id,
Felix Seibert's avatar
Felix Seibert committed
94
                                             random_osd.uuid))
Felix Seibert's avatar
Felix Seibert committed
95
            return osds_for_new_folders
96
97
98
99
100

        # balanced deterministic OSD assignment
        # (following largest processing time first or post-greedy approach)
        list.sort(new_folders, key=lambda x: x.size, reverse=True)

101
102
103
104
105
106
107
108
109
110
111
112
        osd_ratios = {}
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

113
        for folder in new_folders:
114
115
116
117
118
119
120
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
                                        one_osd.totalFolderSize / osd_ratios[one_osd.uuid] \
                                <= least_used_osd.totalFolderSize / osd_ratios[least_used_osd.uuid]:
                    least_used_osd = one_osd
            least_used_osd.add_folder(folder.id, folder.size)
121
122
            osds_for_new_folders.append((folder.id,
                                         least_used_osd.uuid))
123

124
125
126
        return osds_for_new_folders

    def update_folder(self, folder, size):
Felix Seibert's avatar
Felix Seibert committed
127
128
129
        """
        updates the size of a given folder
        """
130
131
132
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
133
134

    def description(self):
Felix Seibert's avatar
Felix Seibert committed
135
136
137
        """
        generates a string describing this data distribution
        """
138
        string = ""
139
140
        for one_osd in self.OSDs.values():
            string += str(one_osd)
141
            string += "\n"
142
            string += "folders : " + str(one_osd.folders)
143
            string += "\n"
144
        string += "average folder size: " + str(self.get_average_folder_size())
145
146
147
        return string

    def __str__(self):
148
149
150
151
152
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation