import random from xtreemfs_client import osd from xtreemfs_client import folder class DataDistribution(object): """ class to keep track of the osd (object storage device) locations of different folders, i.e., their physical location. this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder gets mapped to one OSD). """ def __init__(self): self.OSDs = {} def add_new_osd(self, osd_uuid): """ create a new empty osd and add it to the existing OSDs. """ if osd_uuid in self.OSDs: print("key: " + osd_uuid + " is already present!") return new_osd = osd.OSD(osd_uuid) self.OSDs[osd_uuid] = new_osd def add_osd(self, new_osd): """ add the given OSD (object) to the existing OSDs. """ if new_osd.uuid in self.OSDs: print("key: " + new_osd.uuid + " is already present!") return self.OSDs[new_osd.uuid] = new_osd def add_osd_list(self, osd_list): """ add the given list of OSDs (objects) to the existing OSDs. """ for osd_uuid in osd_list: if osd_uuid not in self.OSDs: new_osd = osd.OSD(osd_uuid) self.OSDs[osd_uuid] = new_osd def get_osd_list(self): """ get a list of all existing OSD uuids. """ osd_list = [] for osd_name in self.OSDs.keys(): osd_list.append(osd_name) return osd_list def get_containing_osd(self, folder_id): """ get the OSD containing the given folder_id, or None if the folder is not assigned to any OSD. """ for checked_osd in self.OSDs.values(): if checked_osd.contains_folder(folder_id): return checked_osd return None def get_average_folder_size(self): """ get the average folder size of all folders of all OSDs. """ total_size = 0 total_number_of_folders = 0 for one_osd in self.OSDs.values(): total_size += one_osd.total_folder_size total_number_of_folders += len(one_osd.folders) if total_number_of_folders == 0: return 0 return total_size / total_number_of_folders def assign_new_osd(self, folder_id, new_osd): """ assign folder_id to new_osd. if folder_id already is assigned to an OSD, this old assignment is deleted. """ old_osd = self.get_containing_osd(folder_id) if old_osd is None: self.OSDs[new_osd].add_folder(folder_id, self.get_average_folder_size()) else: self.OSDs[new_osd].add_folder(folder_id, self.OSDs[old_osd.uuid].folders[folder_id]) self.OSDs[old_osd.uuid].remove_folder(folder_id) def add_folders(self, folders, osd_information=None, ratio_parameter='', capacity='', ignore_osd_capacities=True, random_osd_assignment=False, ignore_folder_sizes=False, debug=False): # TODO update doc """ adds a list of folders to the data distribution. if not specified otherwise, the assignments are calculated using the LPT algorithm. returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment. if the optional arguments are given, OSDs are assigned data proportionally to their ratio_parameter. the assignment is stable (i.e., folders already assigned to an OSD are not reassigned to another OSD). """ new_folders = [] for a_folder in folders: containing_osd = self.get_containing_osd(a_folder.id) if containing_osd is not None: containing_osd.add_folder(a_folder.id, a_folder.size) else: new_folders.append(a_folder) if debug: print("dataDistribution: random_osd_assignment: " + str(random_osd_assignment)) osds_for_new_folders = [] # totally random OSD assignment, even ignoring OSD capacities # (might lead to I/O errors when too many groups are assigned to an OSD) if random_osd_assignment and ignore_osd_capacities and not ignore_folder_sizes: if debug: print("using totally random osd assignment") for a_folder in new_folders: random_osd = random.choice(list(self.OSDs.values())) random_osd.add_folder(a_folder.id, a_folder.size) osds_for_new_folders.append((a_folder.id, random_osd.uuid)) return osds_for_new_folders # random OSD assignment respecting OSD capacities elif random_osd_assignment and not ignore_osd_capacities: if osd_information is None or capacity == '': raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is" "not given!") if debug: print("using random osd assignment, respecting osd capacities") for a_folder in new_folders: suitable_osds = [] # list of OSDs with enough capacity for one_osd in self.OSDs.values(): if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - a_folder.size >= 0: suitable_osds.append(one_osd) suitable_random_osd = random.choice(suitable_osds) suitable_random_osd.add_folder(a_folder.id, a_folder.size) osds_for_new_folders.append((a_folder.id, suitable_random_osd.uuid)) return osds_for_new_folders # random OSD assignment ignoring folder sizes elif random_osd_assignment and ignore_folder_sizes: if debug: print("using random osd assignment ignoring folder sizes") average_folder_size = self.get_average_folder_size() if average_folder_size == 0: average_folder_size = 1 modified_folders = list(map(lambda f: folder.Folder(f.id, average_folder_size, f.origin), folders)) random.shuffle(modified_folders) return self.add_folders(modified_folders) # balanced deterministic OSD assignment # (following largest processing time first or post-greedy approach) list.sort(new_folders, key=lambda x: x.size, reverse=True) osd_ratios = {} if osd_information is not None and ratio_parameter != '': total_osd_size = 0 for osd_size in osd_information.values(): total_osd_size += osd_size[ratio_parameter] for osd_uuid, osd_size in osd_information.items(): osd_ratios[osd_uuid] = float(osd_size[ratio_parameter]) / float(total_osd_size) else: for osd_uuid in self.OSDs.keys(): osd_ratios[osd_uuid] = float(1) for a_folder in new_folders: least_used_osd = None for one_osd in self.OSDs.values(): if (least_used_osd is None) or \ one_osd.total_folder_size / osd_ratios[one_osd.uuid] \ <= least_used_osd.total_folder_size / osd_ratios[least_used_osd.uuid]: least_used_osd = one_osd least_used_osd.add_folder(a_folder.id, a_folder.size) osds_for_new_folders.append((a_folder.id, least_used_osd.uuid)) return osds_for_new_folders def update_folder(self, folder, size): """ updates the size of a given folder """ for one_osd in self.OSDs.values(): if folder in one_osd.folders.keys(): one_osd.update_folder(folder, size) def description(self): """ generates a string describing this data distribution """ string = "" for one_osd in self.OSDs.values(): string += str(one_osd) string += "\n" string += "folders : " + str(one_osd.folders) string += "\n" string += "average folder size: " + str(self.get_average_folder_size()) return string def __str__(self): string_representation = "DataDistribution has " + str(len(self.OSDs)) \ + " osds: \n" for key, value in self.OSDs.items(): string_representation += str(value) + " \n" return string_representation