dataDistribution.py 4.76 KB
Newer Older
1
import xtreemfs_client.osd as osd
2
3

"""
4
5
6
7
class to keep track of the osd (object storage device) locations of different folders, i.e.,
their physical location.

this class also allows to calculate a 'good' osd for new data, based on the
8
9
10
11
12
13
distribution known beforehand.
"""


class DataDistribution(object):
    def __init__(self):
14
        self.OSDs = {}
15

16
17
18
    def add_new_osd(self, osd_uuid):
        if osd_uuid in self.OSDs:
            print("key: " + osd_uuid + " is already present!")
19
            return
20
21
        new_osd = osd.OSD(osd_uuid)
        self.OSDs[osd_uuid] = new_osd
22

23
24
25
    def add_osd(self, new_osd):
        if new_osd.uuid in self.OSDs:
            print("key: " + new_osd.uuid + " is already present!")
26
            return
27
        self.OSDs[new_osd.uuid] = new_osd
28

29
30
31
32
33
    def add_osd_list(self, osd_list):
        for osdUuid in osd_list:
            if osdUuid not in self.OSDs:
                new_osd = osd.OSD(osdUuid)
                self.OSDs[osdUuid] = new_osd
34
35
36

    def get_osd_list(self):
        osd_list = []
37
        for osd_name in self.OSDs.keys():
38
39
40
            osd_list.append(osd_name)
        return osd_list

41
42
43
44
    def get_containing_osd(self, folder_id):
        for checked_osd in self.OSDs.values():
            if checked_osd.contains_folder(folder_id):
                return checked_osd
45
46
        return None

47
48
    def get_average_folder_size(self):
        total_size = 0
49
        total_number_of_folders = 0
50
51
52
        for one_osd in self.OSDs.values():
            total_size += one_osd.totalFolderSize
            total_number_of_folders += len(one_osd.folders)
53
54
        if total_number_of_folders == 0:
            return 0
55
56
57
58
59
60
61
62
63
        return total_size / total_number_of_folders

    def assign_new_osd(self, folder_id, new_osd):
        old_osd = self.get_containing_osd(folder_id)
        if old_osd is None:
            self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size())
        else:
            self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id])
            self.OSDs[old_osd.uuid].remove_folder(folder_id)
64
65
66
67

    '''
    adds a list of folders to the data distribution.
    returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.
68
69

    if the optional arguments are given, OSDs are assigned data proportionally to their ratio_parameter.
70
    '''
71
72

    def add_folders(self, folders, osd_information=None, ratio_parameter=''):
73
74
        new_folders = []
        for folder in folders:
75
76
77
            containing_osd = self.get_containing_osd(folder.id)
            if containing_osd is not None:
                containing_osd.add_folder(folder.id, folder.size)
78
79
80
            else:
                new_folders.append(folder)

81
        list.sort(new_folders, key=lambda x: x.size, reverse=True)
82
83
        osds_for_new_folders = []

84
85
86
87
88
89
90
91
92
93
94
95
        osd_ratios = {}
        if osd_information is not None and ratio_parameter != '':
            total_osd_size = 0
            for osd_size in osd_information.values():
                total_osd_size += osd_size[ratio_parameter]
            for osd_uuid, osd_size in osd_information.items():
                osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size)

        else:
            for osd_uuid in self.OSDs.keys():
                osd_ratios[osd_uuid] = float(1)

96
        for folder in new_folders:
97
98
99
100
101
102
103
104
105
            least_used_osd = None
            for one_osd in self.OSDs.values():
                if (least_used_osd is None) or \
                                        one_osd.totalFolderSize / osd_ratios[one_osd.uuid] \
                                <= least_used_osd.totalFolderSize / osd_ratios[least_used_osd.uuid]:
                    least_used_osd = one_osd
            least_used_osd.add_folder(folder.id, folder.size)
            osds_for_new_folders.append((folder.id, least_used_osd.uuid))

106
107
108
109
110
        return osds_for_new_folders

    '''
    updates the size of a given folder
    '''
111

112
    def update_folder(self, folder, size):
113
114
115
        for one_osd in self.OSDs.values():
            if folder in one_osd.folders.keys():
                one_osd.update_folder(folder, size)
116
117
118
119

    '''
    generates a string describing this data distribution
    '''
120

121
122
    def description(self):
        string = ""
123
124
        for one_osd in self.OSDs.values():
            string += str(one_osd)
125
            string += "\n"
126
            string += "folders : " + str(one_osd.folders)
127
            string += "\n"
128
        string += "average folder size: " + str(self.get_average_folder_size())
129
130
131
        return string

    def __str__(self):
132
133
134
135
136
        string_representation = "DataDistribution has " + str(len(self.OSDs)) \
                                + " osds: \n"
        for key, value in self.OSDs.items():
            string_representation += str(value) + " \n"
        return string_representation