from html.parser import HTMLParser
class DIRStatusPageParser(HTMLParser):
def error(self, message):
pass
def __init__(self):
super().__init__()
self.tableLevel = 0
# first table describes mappings, second current service states, third service configurations
self.currentLevelTwoTable = 0
self.currentLevelTwoData = None
self.currentValues = {}
self.currentKey = None
self.dataSets = []
def handle_starttag(self, tag, attrs):
if tag.lower() == 'table':
self.tableLevel += 1
if self.tableLevel == 2:
self.currentLevelTwoTable += 1
def handle_endtag(self, tag):
if tag.lower() == 'table':
self.tableLevel -= 1
def handle_data(self, data):
stripped_data = data.rstrip().lstrip()
if stripped_data.rstrip().lstrip() != '':
if self.tableLevel == 2 and self.currentLevelTwoTable == 2:
self.currentLevelTwoData = stripped_data
if self.tableLevel == 3 and self.currentLevelTwoTable == 2:
if stripped_data == "type":
self.currentKey = None
self.currentValues = {}
if self.currentKey is None:
self.currentKey = stripped_data
else:
self.currentValues[self.currentKey] = stripped_data
if self.currentKey == 'last updated':
self.currentValues['uuid'] = self.currentLevelTwoData
self.dataSets.append(self.currentValues)
self.currentKey = None