dataDistribution.py 3.57 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import osd

"""
class to keep track of the osd (object storage device) locations of different 
satellite images, i.e., their physical location.
the images are abstracted to the coordinates of their center.
this class also allows to calculate a 'good' osd for new data, based on the 
distribution known beforehand.
"""


class DataDistribution(object):
    
    def __init__(self):
        self.osds = {} # this can be a set???

    def addNewOSD(self, osdUuid):
        if osdUuid in self.osds:
            print ("key: " + osdUuid + " is already present!")
            return
        newOSD = osd.OSD(osdUuid)
        self.osds[osdUuid] = newOSD

    def addOSD(self, osd):
        if osd.uuid in self.osds:
            print ("key: " + osd.uuid + " is already present!")
            return
        self.osds[osd.uuid] = osd

    def addOSDList(self, osdList):
        for osdUuid in osdList:
            if not osdUuid in self.osds:
                newOSD = osd.OSD(osdUuid)
                self.osds[osdUuid] = newOSD

    def get_osd_list(self):
        osd_list = []
        for osd_name in self.osds.keys():
            osd_list.append(osd_name)
        return osd_list

    def getContainingOSD(self, folder_id):
        for osd in self.osds.values():
            if osd.contains_folder(folder_id):
                return osd
        return None

    def getAverageFolderSize(self):
        totalSize = 0
        total_number_of_folders = 0
        for osd in self.osds.values():
            totalSize += osd.totalFolderSize
            total_number_of_folders += len(osd.folders)
        if total_number_of_folders == 0:
            return 0
        return totalSize / total_number_of_folders

    '''
    adds a list of folders to the data distribution.
    returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.
    '''
    def addFolders(self, folders):
        new_folders = []
        for folder in folders:
            containingOSD = self.getContainingOSD(folder.id)
            if containingOSD is not None:
                containingOSD.add_folder(folder.id, folder.size)
            else:
                new_folders.append(folder)

        list.sort(new_folders, key=lambda folder: folder.size, reverse=True)
        osds_for_new_folders = []

        for folder in new_folders:
            leastUsedOSD = None
            for osd in self.osds.values():
                if (leastUsedOSD == None) or \
                        osd.totalFolderSize <= leastUsedOSD.totalFolderSize:
                    leastUsedOSD = osd
            leastUsedOSD.add_folder(folder.id, folder.size)
            osds_for_new_folders.append((folder.id, leastUsedOSD.uuid))
        
        return osds_for_new_folders

    '''
    updates the size of a given folder
    '''
    def update_folder(self, folder, size):
        for osd in self.osds.values():
            if folder in osd.folders.keys():
                osd.update_folder(folder, size)

    '''
    generates a string describing this data distribution
    '''
    def description(self):
        string = ""
        for osd in self.osds.values():
            string += str(osd)
            string += "\n"
            string += "folders : " + str(osd.folders)
            string += "\n"
        string += "average folder size: " + str(self.getAverageFolderSize())
        return string


    def __str__(self):
        stringRepresentation = "DataDistribution has " + str(len(self.osds)) \
            + " osds: \n"
        for key, osd in self.osds.items():
            stringRepresentation += str(osd) + " \n"
        return stringRepresentation