Commit 144bb743 authored by Felix Seibert's avatar Felix Seibert
Browse files

refactor: bandwidths instead of capacities

parent be184d08
...@@ -34,14 +34,12 @@ class TestDataDistribution(unittest.TestCase): ...@@ -34,14 +34,12 @@ class TestDataDistribution(unittest.TestCase):
distribution_felix = dataDistribution.DataDistribution() distribution_felix = dataDistribution.DataDistribution()
distribution_felix.add_osd_list(create_test_osd_list(num_osds, osds_capacities)) distribution_felix.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
distribution_felix.add_folders(create_test_folder_list(num_folders, folder_sizes), distribution_felix.add_folders(create_test_folder_list(num_folders, folder_sizes),
osd_information=create_osd_information(num_osds, osds_capacities),
random_osd_assignment=True, random_osd_assignment=True,
ignore_osd_capacities=True) ignore_osd_capacities=True)
distribution_farouk = dataDistribution.DataDistribution() distribution_farouk = dataDistribution.DataDistribution()
distribution_farouk.add_osd_list(create_test_osd_list(num_osds, osds_capacities)) distribution_farouk.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
distribution_farouk.add_folders(create_test_folder_list(num_folders, folder_sizes), distribution_farouk.add_folders(create_test_folder_list(num_folders, folder_sizes),
osd_information=create_osd_information(num_osds, osds_capacities),
random_osd_assignment=True, random_osd_assignment=True,
ignore_osd_capacities=True) ignore_osd_capacities=True)
...@@ -64,17 +62,6 @@ class TestDataDistribution(unittest.TestCase): ...@@ -64,17 +62,6 @@ class TestDataDistribution(unittest.TestCase):
self.assertTrue(felix_and_farouk_different) self.assertTrue(felix_and_farouk_different)
self.assertTrue(max_osd_total_folder_size > osds_capacities[0]) self.assertTrue(max_osd_total_folder_size > osds_capacities[0])
def test_value_error(self):
distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(1, [0]))
try:
distribution.add_folders(create_test_folder_list(1, [1]),
random_osd_assignment=True,
ignore_osd_capacities=False)
except ValueError:
return # expect value error
self.fail("expect value error!")
def test_random_distribution_respecting_capacities(self): def test_random_distribution_respecting_capacities(self):
# generate some random distributions and check whether they all respect the OSD capacities # generate some random distributions and check whether they all respect the OSD capacities
num_osds = 3 num_osds = 3
...@@ -87,11 +74,11 @@ class TestDataDistribution(unittest.TestCase): ...@@ -87,11 +74,11 @@ class TestDataDistribution(unittest.TestCase):
for i in range(0, 100): for i in range(0, 100):
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osds_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osds_capacities))
distribution.set_osd_capacities(create_osd_information(num_osds, osds_capacities))
distribution.add_folders(create_test_folder_list(num_folders, folder_size), distribution.add_folders(create_test_folder_list(num_folders, folder_size),
random_osd_assignment=True, random_osd_assignment=True,
ignore_osd_capacities=False, ignore_osd_capacities=False)
osd_information=create_osd_information(num_osds, osds_capacities),
capacity=osd_capacity_key)
osds = distribution.get_osd_list() osds = distribution.get_osd_list()
total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds)) total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
...@@ -150,10 +137,13 @@ class TestDataDistribution(unittest.TestCase): ...@@ -150,10 +137,13 @@ class TestDataDistribution(unittest.TestCase):
self.assertTrue(min(total_folder_sizes) == max(total_folder_sizes)) self.assertTrue(min(total_folder_sizes) == max(total_folder_sizes))
# test 1 for differently-sized OSDs # test 1 for differently-sized OSDs
osd_capacities = [10, 20] osd_bandwidths_1 = [10, 20]
folder_sizes = [4, 4, 4] folder_sizes = [4, 4, 4, 4, 4, 4]
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths_1))
distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths_1))
distribution.add_folders(create_test_folder_list(num_folders, folder_sizes)) distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
osds = distribution.get_osd_list() osds = distribution.get_osd_list()
total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds)) total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
...@@ -161,35 +151,35 @@ class TestDataDistribution(unittest.TestCase): ...@@ -161,35 +151,35 @@ class TestDataDistribution(unittest.TestCase):
# test 2 for differently-sized OSDs. the expected result is that the 4 large OSD receive 2 files each, # test 2 for differently-sized OSDs. the expected result is that the 4 large OSD receive 2 files each,
# while the 4 small OSDs receive no files. # while the 4 small OSDs receive no files.
osd_capacities = [10, 30] osd_bandwidths_2 = [10, 30]
folder_sizes = [1] folder_sizes = [1]
num_folders = 8 num_folders = 8
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths_2))
distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths_2))
osd_information=create_osd_information(num_osds, osd_capacities),
ratio_parameter=osd_capacity_key) distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
osds = distribution.get_osd_list() osds = distribution.get_osd_list()
total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds)) total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
self.assertEqual(0, min(total_folder_sizes)) self.assertEqual(0, min(total_folder_sizes))
self.assertEqual(2, max(total_folder_sizes)) self.assertEqual(2, max(total_folder_sizes))
def test_average_osd_load(self): def test_average_osd_processing_time(self):
folder_sizes = [49, 123, 1, 7] folder_sizes = [48, 123, 1, 7]
num_folders = 2 num_folders = 2
num_osds = 4 num_osds = 4
osd_capacities = [100, 150] osd_bandwidths = [10, 15]
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths))
create_osd_information(num_osds, osd_capacities),
osd_capacity_key)
average = (sum(folder_sizes) * num_folders) / (sum(osd_capacities * num_osds)) distribution.add_folders(create_test_folder_list(num_folders, folder_sizes))
self.assertEqual(average, distribution.get_average_osd_load(create_osd_information(num_osds, osd_capacities),
osd_capacity_key)) average = 3.05
self.assertEqual(average, distribution.get_average_processing_time())
def test_average_total_folder_size(self): def test_average_total_folder_size(self):
folder_sizes = [49, 123, 1, 7] folder_sizes = [49, 123, 1, 7]
...@@ -203,7 +193,7 @@ class TestDataDistribution(unittest.TestCase): ...@@ -203,7 +193,7 @@ class TestDataDistribution(unittest.TestCase):
osd_capacity_key) osd_capacity_key)
average = (sum(folder_sizes) * num_folders) / (num_osds * len(osd_capacities)) average = (sum(folder_sizes) * num_folders) / (num_osds * len(osd_capacities))
self.assertEqual(average, distribution.get_average_total_folder_size()) self.assertEqual(average, distribution.get_average_load())
def test_rebalance_lpt(self): def test_rebalance_lpt(self):
folder_sizes = [1] folder_sizes = [1]
...@@ -213,6 +203,8 @@ class TestDataDistribution(unittest.TestCase): ...@@ -213,6 +203,8 @@ class TestDataDistribution(unittest.TestCase):
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities))
distribution.set_osd_capacities(create_osd_information(num_osds, osd_capacities))
distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True) distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
distribution.rebalance_lpt() distribution.rebalance_lpt()
...@@ -224,26 +216,30 @@ class TestDataDistribution(unittest.TestCase): ...@@ -224,26 +216,30 @@ class TestDataDistribution(unittest.TestCase):
def test_rebalance_one_folder(self): def test_rebalance_one_folder(self):
folder_sizes = [1] folder_sizes = [1]
num_folders = 8 num_folders = 8
osd_capacities = [10] osd_bandwidths = [10]
num_osds = 4 num_osds = 4
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True) distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
distribution.rebalance_one_folder() distribution.rebalance_one_folder()
osds = distribution.get_osd_list() osds = distribution.get_osd_list()
total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds)) total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
# we should obtain a perfectly balanced distribution # we should obtain a perfectly balanced distribution
self.assertEqual(min(total_folder_sizes), max(total_folder_sizes)) self.assertEqual(min(total_folder_sizes), max(total_folder_sizes))
osd_capacities = [10, 30] osd_bandwidths = [10, 30]
folder_sizes = [1] folder_sizes = [1]
num_folders = 8 num_folders = 8
distribution = dataDistribution.DataDistribution() distribution = dataDistribution.DataDistribution()
distribution.add_osd_list(create_test_osd_list(num_osds, osd_capacities)) distribution.add_osd_list(create_test_osd_list(num_osds, osd_bandwidths))
distribution.set_osd_bandwidths(create_osd_information(num_osds, osd_bandwidths))
distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True) distribution.add_folders(create_test_folder_list(num_folders, folder_sizes), random_osd_assignment=True)
distribution.rebalance_one_folder(osd_information=create_osd_information(num_osds, osd_capacities),
capacity=osd_capacity_key) distribution.rebalance_one_folder()
osds = distribution.get_osd_list() osds = distribution.get_osd_list()
total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds)) total_folder_sizes = list(map(lambda x: distribution.OSDs[x].total_folder_size, osds))
...@@ -274,8 +270,7 @@ def create_osd_information(num_osds, osd_capacities): ...@@ -274,8 +270,7 @@ def create_osd_information(num_osds, osd_capacities):
for i in range(0, num_osds): for i in range(0, num_osds):
for osd_capacity in osd_capacities: for osd_capacity in osd_capacities:
osd_uuid = create_osd_id(osd_capacity, i) osd_uuid = create_osd_id(osd_capacity, i)
osd_information[osd_uuid] = {} osd_information[osd_uuid] = osd_capacity
osd_information[osd_uuid][osd_capacity_key] = osd_capacity
return osd_information return osd_information
......
...@@ -11,12 +11,8 @@ class DataDistribution(object): ...@@ -11,12 +11,8 @@ class DataDistribution(object):
this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder this class also allows to calculate several data distributions, e.g., mappings from folders to OSDs (each folder
gets mapped to one OSD). gets mapped to one OSD).
the load is defined as the quotient from the total_folder_size of an OSD divided by its capacity.
""" """
# TODO introduce consistent handling of (missing) OSD capacities / osd_information
def __init__(self): def __init__(self):
self.OSDs = {} self.OSDs = {}
...@@ -48,6 +44,25 @@ class DataDistribution(object): ...@@ -48,6 +44,25 @@ class DataDistribution(object):
new_osd = osd.OSD(osd_uuid) new_osd = osd.OSD(osd_uuid)
self.OSDs[osd_uuid] = new_osd self.OSDs[osd_uuid] = new_osd
def set_osd_capacities(self, osd_capacities):
"""
set osd capacities
:param osd_capacities: map from osd uuids to osd capacities
:return:
"""
for one_osd in self.OSDs.values():
assert type(osd_capacities[one_osd.uuid]) is int
one_osd.capacity = osd_capacities[one_osd.uuid]
def set_osd_bandwidths(self, osd_bandwidths):
"""
set osd bandwidths
:param osd_bandwidths:
:return:
"""
for one_osd in self.OSDs.values():
one_osd.bandwidth = osd_bandwidths[one_osd.uuid]
def get_osd_list(self): def get_osd_list(self):
""" """
get a list of all existing OSD uuids. get a list of all existing OSD uuids.
...@@ -90,47 +105,52 @@ class DataDistribution(object): ...@@ -90,47 +105,52 @@ class DataDistribution(object):
return 0 return 0
return total_size / total_number_of_folders return total_size / total_number_of_folders
def get_average_osd_load(self, osd_information, capacity): def get_average_load(self):
""" """
calculate the average OSD load, that is, calculate the average OSD load, that is, the average of their total_folder_size.
the ratio between the sum of all folder sizes and the total OSD capacity.
""" """
total_folder_size = 0 total_folder_size = 0
total_osd_capacity = 0 for osd in self.OSDs.values():
for osd_uuid in self.OSDs.keys(): total_folder_size += osd.get_load()
total_folder_size += self.OSDs[osd_uuid].total_folder_size return total_folder_size / len(self.OSDs)
total_osd_capacity += osd_information[osd_uuid][capacity]
return total_folder_size / total_osd_capacity
def get_maximum_osd_load(self, osd_information, capacity): def get_maximum_load(self):
""" """
calculate the maximum OSD load. calculate the maximum OSD load, that is, the maximum of their total_folder_size.
""" """
assert osd_information is not None
assert capacity != ''
maximum_load = 0 maximum_load = 0
maximum_osd = None maximum_osd = None
for osd in self.OSDs.values(): for osd in self.OSDs.values():
load = osd.total_folder_size / osd_information[osd.uuid][capacity] load = osd.total_folder_size
if maximum_osd is None or load > maximum_load: if maximum_osd is None or load > maximum_load:
maximum_load = load maximum_load = load
maximum_osd = osd maximum_osd = osd
return maximum_osd, maximum_load return maximum_osd, maximum_load
def get_average_total_folder_size(self): def get_average_processing_time(self):
""" """
calculate the average total_folder_size of the OSDs. calculate the average OSD processing time, that is, the average of their (total_folder_size / bandwidth).
:return:
""" """
total_folder_size = 0 total_processing_time = 0
num_osds = 0
for osd in self.OSDs.values(): for osd in self.OSDs.values():
total_folder_size += osd.total_folder_size total_processing_time += osd.get_processing_time()
num_osds += 1 return total_processing_time / len(self.OSDs)
return total_folder_size / num_osds def get_maximum_processing_time(self):
"""
calculate the maximum OSD processing time, also known as makespan
"""
maximum_processing_time = 0
maximum_osd = None
for osd in self.OSDs.values():
processing_time = osd.get_processing_time()
if maximum_osd is None or processing_time > maximum_processing_time:
maximum_processing_time = processing_time
maximum_osd = osd
return maximum_osd, maximum_processing_time
def add_folders(self, folders, def add_folders(self, folders,
osd_information=None, ratio_parameter='', capacity='',
ignore_osd_capacities=True, ignore_osd_capacities=True,
random_osd_assignment=False, random_osd_assignment=False,
ignore_folder_sizes=False, ignore_folder_sizes=False,
...@@ -140,17 +160,8 @@ class DataDistribution(object): ...@@ -140,17 +160,8 @@ class DataDistribution(object):
if not specified otherwise, the assignments are calculated using the LPT algorithm. if not specified otherwise, the assignments are calculated using the LPT algorithm.
returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment. returns a list of assignments from folders to OSDs, for which (folders) there was previously no assignment.
if osd_information and ratio_parameter are given, if capacities and bandwidths are set for the OSDs, folders are assigned accordingly
OSDs are assigned data proportionally to their ratio_parameter. (capacities are respected and OSDs with higher bandwidth obtain more/larger files).
osd_information is a map (that we now call outer map) that contains, for each OSD, an inner_map.
outer_map[osd_uuid][ratio_parameter] is used to calculate the proportion of data assigned to OSD with uuid
osd_uuid.
if ignore_osd_capacities=False,
outer_map[osd_uuid][capacity] is used (only in combination with random_osd_assignment=True)
to generate random assignments that do not surpass the capacities of the OSDs.
(random assignment respecting OSD capacities)
if random_osd_assignment=True and ignore_osd_capacities=True, a totally random OSD assignment generated. if random_osd_assignment=True and ignore_osd_capacities=True, a totally random OSD assignment generated.
...@@ -190,15 +201,12 @@ class DataDistribution(object): ...@@ -190,15 +201,12 @@ class DataDistribution(object):
# random OSD assignment respecting OSD capacities # random OSD assignment respecting OSD capacities
elif random_osd_assignment and not ignore_osd_capacities: elif random_osd_assignment and not ignore_osd_capacities:
if osd_information is None or capacity == '':
raise ValueError("ignore_osd_capacities=False is not possible if osd_information or capacity is"
"not given!")
if debug: if debug:
print("using random osd assignment, respecting osd capacities") print("using random osd assignment, respecting osd capacities")
for a_folder in new_folders: for a_folder in new_folders:
suitable_osds = [] # list of OSDs with enough capacity suitable_osds = [] # list of OSDs with enough capacity
for one_osd in self.OSDs.values(): for one_osd in self.OSDs.values():
if osd_information[one_osd.uuid][capacity] - one_osd.total_folder_size - a_folder.size >= 0: if one_osd.capacity - one_osd.total_folder_size - a_folder.size >= 0:
suitable_osds.append(one_osd) suitable_osds.append(one_osd)
suitable_random_osd = random.choice(suitable_osds) suitable_random_osd = random.choice(suitable_osds)
suitable_random_osd.add_folder(a_folder.id, a_folder.size) suitable_random_osd.add_folder(a_folder.id, a_folder.size)
...@@ -206,7 +214,7 @@ class DataDistribution(object): ...@@ -206,7 +214,7 @@ class DataDistribution(object):
suitable_random_osd.uuid)) suitable_random_osd.uuid))
return osds_for_new_folders return osds_for_new_folders
# random OSD assignment ignoring folder sizes # random OSD assignment ignoring folder sizes // round-robin style distribution with some randomness
elif random_osd_assignment and ignore_folder_sizes: elif random_osd_assignment and ignore_folder_sizes:
if debug: if debug:
print("using random osd assignment ignoring folder sizes") print("using random osd assignment ignoring folder sizes")
...@@ -223,57 +231,56 @@ class DataDistribution(object): ...@@ -223,57 +231,56 @@ class DataDistribution(object):
# (following largest processing time first, also called post-greedy approach) # (following largest processing time first, also called post-greedy approach)
list.sort(new_folders, key=lambda x: x.size, reverse=True) list.sort(new_folders, key=lambda x: x.size, reverse=True)
# if osd_information is None, use the fake osd_information, which assumes that all OSDs have the same capacity
# otherwise use the given osd_information
if osd_information is None:
ratio_parameter = 'dummy_value'
osd_information = self.get_equal_sized_fake_osd_information(ratio_parameter)
# for each folder calculate the best OSD and add it to it # for each folder calculate the best OSD and add it to it
for a_folder in new_folders: for a_folder in new_folders:
least_used_osd, _ = self.get_lpt_osd(osd_information, ratio_parameter, a_folder.size) least_used_osd, _ = self.get_lpt_osd(a_folder.size)
least_used_osd.add_folder(a_folder.id, a_folder.size) least_used_osd.add_folder(a_folder.id, a_folder.size)
osds_for_new_folders.append((a_folder.id, osds_for_new_folders.append((a_folder.id,
least_used_osd.uuid)) least_used_osd.uuid))
return osds_for_new_folders return osds_for_new_folders
def rebalance_lpt(self, rebalance_factor=1, osd_information=None, capacity=''): def rebalance_lpt(self, rebalance_factor=1):
""" """
rebalance folders to OSDs by assigning folders to new OSDs using the following strategy: rebalance folders to OSDs by assigning folders to new OSDs using the following strategy:
1. 'unroll' the assignment. this means that, for each OSD, folders are removed until the OSD has less 1. 'unroll' the assignment. this means that, for each OSD, folders are removed until the OSD has less
total_folder_size than the average total folder size of this distribution multiplied by rebalance_factor. processing time than the average processing time of this distribution multiplied by rebalance_factor.
2. reassign the removed folders using the LPT strategy. 2. reassign the removed folders using the LPT strategy.
""" """
movements = {} movements = {}
folders_to_be_reassigned = [] folders_to_be_reassigned = []
reassignment_factor = self.get_average_osd_load(osd_information, capacity) * rebalance_factor # TODO reassignment factor based on load or based on processing time?
# as long as we use OSDs with the same bandwidth, there is no difference.
reassignment_limit = self.get_average_processing_time() * rebalance_factor
# for each OSD, remove the smallest folder until its total_folder_size does not exceed the reassignment_limit # for each OSD, remove the smallest folder until its total_folder_size does not exceed the reassignment_limit
# unrolling # unrolling
for osd in self.OSDs.values(): for osd in self.OSDs.values():
while osd.total_folder_size > reassignment_factor * osd_information[osd.uuid][capacity]: # TODO how to calculate the 'unrolling limit' for each individual OSD?
# again, as long as all OSDs have the same bandwidth, there is no difference.
# BUT it should definitely not depend on the load or pt of the OSD.
# so for now we use a static limit, the same for all OSDs.
while osd.get_processing_time() > reassignment_limit:
folder_id, folder_size = osd.get_smallest_folder() folder_id, folder_size = osd.get_smallest_folder()
folders_to_be_reassigned.append(folder.Folder(folder_id, folder_size, None)) folders_to_be_reassigned.append(folder.Folder(folder_id, folder_size, None))
movements[folder_id] = osd.uuid movements[folder_id] = osd.uuid
osd.remove_folder(folder_id) osd.remove_folder(folder_id)
# reassignment # reassignment
new_assignments = self.add_folders(folders_to_be_reassigned, new_assignments = self.add_folders(folders_to_be_reassigned)
osd_information=osd_information, ratio_parameter=capacity)
for folder_id, target in new_assignments: for folder_id, target in new_assignments:
movements[folder_id] = (movements[folder_id], target) movements[folder_id] = (movements[folder_id], target)
return movements return movements
def rebalance_one_folder(self, osd_information=None, capacity=''): def rebalance_one_folder(self):
""" """
rebalance folders to OSDs by assigning folders to new OSDs using the following strategy: rebalance folders to OSDs by assigning folders to new OSDs using the following strategy:
1. find OSD with the highest load 1. find OSD with the highest processing time
2. get folder with smallest size on this OSD 2. get folder with smallest size on this OSD
3. find new OSD for this folder using get_lpt_osd 3. find new OSD for this folder using get_lpt_osd
4. if the load on the new OSD is lower than on the original OSD, move the folder to the new OSD. 4. if the processing time on the new OSD is lower than on the original OSD,
otherwise, return. move the folder to the new OSD. otherwise, return.
one open question is whether getting the folder with smallest size in step 2 is a clever choice one open question is whether getting the folder with smallest size in step 2 is a clever choice
(in principle, all folders of the OSD with the highest load are eligible). (in principle, all folders of the OSD with the highest load are eligible).
...@@ -283,16 +290,11 @@ class DataDistribution(object): ...@@ -283,16 +290,11 @@ class DataDistribution(object):
but it might be possible to show that if there is no improvement step of the type that we check for, but it might be possible to show that if there is no improvement step of the type that we check for,
there is no improvement step at all. there is no improvement step at all.
""" """
if osd_information is None:
capacity_key = 'capacity'
osd_information = self.get_equal_sized_fake_osd_information(capacity_key)
capacity = capacity_key
movements = {} movements = {}
while True: while True:
# find OSD with the highest load (origin) # find OSD with the highest processing time (origin)
origin_osd, maximum_load = self.get_maximum_osd_load(osd_information, capacity) origin_osd, maximum_processing_time = self.get_maximum_processing_time()
# pick a folder of this OSD # pick a folder of this OSD
# there are several ways to pick a folder (like largest, smallest, constrained by the resulting load of the # there are several ways to pick a folder (like largest, smallest, constrained by the resulting load of the
...@@ -302,9 +304,9 @@ class DataDistribution(object): ...@@ -302,9 +304,9 @@ class DataDistribution(object):
# find other OSD best suited for the picked folder (target) # find other OSD best suited for the picked folder (target)
# check whether moving folder from origin to target decreases the maximum load of all OSDs (makespan). # check whether moving folder from origin to target decreases the maximum load of all OSDs (makespan).
best_osd, best_osd_load = self.get_lpt_osd(osd_information, capacity, smallest_folder_size) best_osd, best_osd_processing_time = self.get_lpt_osd(smallest_folder_size)
if best_osd_load < maximum_load: if best_osd_processing_time < maximum_processing_time:
self.assign_new_osd(smallest_folder_id, best_osd.uuid) self.assign_new_osd(smallest_folder_id, best_osd.uuid)
movements[smallest_folder_id] = (origin_osd.uuid, best_osd.uuid) movements[smallest_folder_id] = (origin_osd.uuid, best_osd.uuid)
else: else:
...@@ -312,28 +314,19 @@ class DataDistribution(object): ...@@ -312,28 +314,19 @@ class DataDistribution(object):