dirstatuspageparser.py 1.63 KB
Newer Older
1
2
3
from html.parser import HTMLParser


Felix Seibert's avatar
Felix Seibert committed
4
5
6
7
class DIRStatusPageParser(HTMLParser):
    def error(self, message):
        pass

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    def __init__(self):

        super().__init__()
        self.tableLevel = 0

        # first table describes mappings, second current service states, third service configurations
        self.currentLevelTwoTable = 0

        self.currentLevelTwoData = None

        self.currentValues = {}
        self.currentKey = None

        self.dataSets = []

    def handle_starttag(self, tag, attrs):
        if tag.lower() == 'table':
            self.tableLevel += 1
            if self.tableLevel == 2:
                self.currentLevelTwoTable += 1

    def handle_endtag(self, tag):
        if tag.lower() == 'table':
            self.tableLevel -= 1

    def handle_data(self, data):
        stripped_data = data.rstrip().lstrip()
        if stripped_data.rstrip().lstrip() != '':
            if self.tableLevel == 2 and self.currentLevelTwoTable == 2:
                self.currentLevelTwoData = stripped_data
            if self.tableLevel == 3 and self.currentLevelTwoTable == 2:
                if stripped_data == "type":
                    self.currentKey = None
                    self.currentValues = {}
                if self.currentKey is None:
                    self.currentKey = stripped_data
                else:
                    self.currentValues[self.currentKey] = stripped_data
                    if self.currentKey == 'last updated':
                        self.currentValues['uuid'] = self.currentLevelTwoData
                        self.dataSets.append(self.currentValues)
                    self.currentKey = None