Skip to content

Commit

Permalink
added BSI 2023 / fixed a bug in BSI 2022 (ignoring anforderungsrollen…
Browse files Browse the repository at this point in the history
… because wrong/new order of type/roles in name)
  • Loading branch information
gockelhahn committed Feb 14, 2023
1 parent 2c03ca7 commit daf3630
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 34 deletions.
Empty file removed data/.gitkeep
Empty file.
51 changes: 29 additions & 22 deletions tools/download_and_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from jsonschema import validate

from lib.common import get_from_json, save_json
from lib.BSI import BSI, BSI2020, BSI2022
from lib.BSI import BSI, BSIFactory


# return ID if data (based on a key) is found in json file,
Expand Down Expand Up @@ -101,8 +101,30 @@ def create(bsi: BSI) -> None:
bsielements[kat][str(bauv)]['anforderungen']):
anf_label = bsielements[kat][str(bauv)][
'anforderungen'][str(anfv)]['label']

# examine label + type + reponsible (role)
found = re.search(r'(.*)[\s]\((.*)\)', anf_label)
# FIXME: in BSI 2023, different order of type / reposonsibility
anf_real_label = anf_label
# set Bausteinverantwortlichen in Anforderung by default
anf_rollen_ids = [bau_rolle_id]

# search for responsibility
ff = re.search(r'(.*)\s\[(.*)](.*)', anf_label)
if ff:
# rebuild string without matching part
anf_real_label = '{}{}'.format(ff.groups()[0],
ff.groups()[2])
anf_rollen = ff.groups()[1]
# reset rollen
anf_rollen_ids = []
# negative lookahead, split on ',' but not inside brackets
for entry in re.split(r',(?![^(]*\))', anf_rollen):
rolle_data = {'name': entry.strip()}
rolle_id = get_or_create(
j_rolle, d_rolle, 'name', rolle_data)
anf_rollen_ids.append(rolle_id)

found = re.search(r'(.*)\s\(([BSH])\)', anf_real_label)
if found:
anf_real_label = found.groups()[0]
anf_typ = found.groups()[1]
Expand All @@ -117,23 +139,6 @@ def create(bsi: BSI) -> None:
elif 'H' == anf_typ.upper():
anf_typ = 'Hoch'

# set Bausteinverantwortlichen in Anforderung by default
anf_rollen_ids = [bau_rolle_id]

# search again for responsibility
ff = re.search(r'.*\[(.*)]', anf_real_label)
if ff:
anf_real_label = anf_real_label.split(' [')[0]
anf_rollen = ff.groups()[0]
# reset rollen
anf_rollen_ids = []
# negative lookahead, split on ',' but not inside brackets
for entry in re.split(r',(?![^(]*\))', anf_rollen):
rolle_data = {'name': entry.strip()}
rolle_id = get_or_create(
j_rolle, d_rolle, 'name', rolle_data)
anf_rollen_ids.append(rolle_id)

d_anforderung.append({
'id': len(d_anforderung),
'name': bsielements[kat][str(bauv)][
Expand Down Expand Up @@ -198,12 +203,14 @@ def create(bsi: BSI) -> None:


def main() -> None:
bsi2020 = BSI2020()
bsi2020 = BSIFactory.get_bsi_version(2020)
create(bsi2020)
bsi2021 = BSI()
bsi2021 = BSIFactory.get_bsi_version(2021)
create(bsi2021)
bsi2022 = BSI2022()
bsi2022 = BSIFactory.get_bsi_version(2022)
create(bsi2022)
bsi2023 = BSIFactory.get_bsi_version(2023)
create(bsi2023)


if __name__ == '__main__':
Expand Down
89 changes: 77 additions & 12 deletions tools/lib/BSI.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@
)


class BSIFactory(object):
@staticmethod
def get_bsi_version(year):
if year == 2020:
return BSI2020()
if year == 2021:
return BSI()
if year == 2022:
return BSI2022()
if year == 2023:
return BSI2023()


class BSI(object):
VERSION = '2021'
BSI_DOMAIN = 'https://www.bsi.bund.de'
Expand Down Expand Up @@ -155,9 +168,10 @@ def get_bausteinkategorien(self) -> dict:
'//div[contains(@class, "l-content-wrapper")]//h2'):
# collect Bausteinkategorie attributes
kat_title = kat_link.text_content().strip()
# skip (needed for BSI 2022!?)
if self.VERSION == '2022' and kat_title == 'Ähnliche Themen':
continue
# skip (needed for BSI 2022/2023)
if self.VERSION in ['2022', '2023']:
if kat_title == 'Ähnliche Themen':
continue
kat_title_list = kat_title.split(': ')
kat_name = kat_title_list[0]
kat_label = kat_title_list[1]
Expand Down Expand Up @@ -200,7 +214,7 @@ def get_bausteine_with_anforderungen(self) -> dict:

# loop through all Baustein PDFs
for path in glob.glob(os.path.join(self.baustein_dir, '*.pdf')):
file_prefix = path.split('.pdf')[0]
file_prefix = os.path.splitext(path)[0]
# table of content
toc_path = '{}s.html'.format(file_prefix)
# content
Expand All @@ -226,6 +240,10 @@ def get_bausteine_with_anforderungen(self) -> dict:
# collect Anforderung attributes
anf_title_split = anf_link.text_content().split()
anf_name = anf_title_split[0]
# FIXME: typo in 2023
if self.VERSION == '2023':
if anf_name == 'OPS.2.3A22':
anf_name = 'OPS.2.3.A22'
anf_number = anf_name.split(
'{}.A'.format(bau_name))[1]
anf_label = ' '.join(anf_title_split[1:])
Expand All @@ -235,10 +253,11 @@ def get_bausteine_with_anforderungen(self) -> dict:
'label': clean_gap(anf_label)}

# fix label BSI2022
if self.VERSION == '2022':
if anf_name == 'INF.12.A16':
anforderungen[anf_number]['label'] = clean_gap(anf_label).replace(
' Haustechnik]', ' [Haustechnik]')
if (self.VERSION == '2022' and
anf_name == 'INF.12.A16'):
anforderungen[anf_number]['label'] = clean_gap(
anf_label).replace(' Haustechnik]',
' [Haustechnik]')

# get responsible person
# yes we need the NBSP character here
Expand All @@ -259,10 +278,15 @@ def get_bausteine_with_anforderungen(self) -> dict:
'/following::p/text()')[0].strip()

# fix rolle BSI2022
if self.VERSION == '2022':
if self.VERSION in ['2022', '2023']:
if rolle == 'OT-Betrieb':
rolle = 'OT-Betrieb (Operational Technology, OT)'

# fix rolle in BSI 2023
if self.VERSION in ['2023']:
if rolle == 'Informationssicherheitsbeauftragte':
rolle = 'Informationssicherheitsbeauftragte (ISB)'

if bau_cat not in self.baustein:
self.baustein[bau_cat] = {}

Expand All @@ -278,7 +302,7 @@ def get_gefaehrdungen_by_anforderung(self, anf_name: str) -> dict:
bau_name = anf_name.split('.A')[0]
sheet_name = bau_name
# fix errors within KRT, overlooked by BSI (until 08.03.2022)
if self.VERSION != '2022':
if self.VERSION not in ['2022', '2023']:
if bau_name == 'INF.2':
sheet_name = 'INF.2_'
if anf_name == 'ORP.1.A9':
Expand All @@ -298,6 +322,12 @@ def get_gefaehrdungen_by_anforderung(self, anf_name: str) -> dict:
# fix errors within KRT, overlooked by BSI
for i, value in enumerate(all_gefaehrdungen):
newvalue = value
# FIXME: error in BSI 2023
if self.VERSION in ['2023']:
if value == 'G 0.0':
newvalue = 'G 0.3'
if value == 'G.0.14':
newvalue = 'G 0.14'
newvalue = newvalue.replace('G0', 'G 0')
newvalue = newvalue.replace('G.0', 'G 0.')
newvalue = newvalue.replace('G 0.0', 'G 0.')
Expand Down Expand Up @@ -326,13 +356,13 @@ class BSI2022(BSI):
'/DE/Themen/Unternehmen-und-Organisationen'
'/Standards-und-Zertifizierung/IT-Grundschutz'
'/IT-Grundschutz-Kompendium/IT-Grundschutz-Bausteine'
'/Bausteine_Download_Edition_node.html'
'/2022/Bausteine_Download_Edition.html'
)
# Kreuzreferenztabelle (xlsx)
KRT_URL = (
DOWNLOAD_BASE +
'/Kompendium/krt2022_Excel.xlsx'
'?__blob=publicationFile&v=6'
'?__blob=publicationFile&v=7'
)

def __init__(self, tmpdir: Optional[str] = None) -> None:
Expand Down Expand Up @@ -431,3 +461,38 @@ def get_gefaehrdungen_by_anforderung(self, anf_name: str) -> dict:
gefaehrdungen_anf[fixed_gef] = anforderung_values[0][1]

return gefaehrdungen_anf


class BSI2023(BSI):
VERSION = '2023'
BSI_DOMAIN = 'https://www.bsi.bund.de'
DOWNLOAD_BASE = (
BSI_DOMAIN +
'/SharedDocs/Downloads/DE/BSI/Grundschutz'
)
# ZIP file with separate PDFs (one PDF per Baustein)
KOMPENDIUM_URL = (
DOWNLOAD_BASE +
'/IT-GS-Kompendium_Einzel_PDFs_2023/Zip_Datei_Edition_2023.zip'
'?__blob=publicationFile&v=3'
)
# overview URL with Bausteinkategorien
OVERVIEW_URL = (
BSI_DOMAIN +
'/DE/Themen/Unternehmen-und-Organisationen'
'/Standards-und-Zertifizierung/IT-Grundschutz'
'/IT-Grundschutz-Kompendium/IT-Grundschutz-Bausteine'
'/Bausteine_Download_Edition_node.html'
)
# Kreuzreferenztabelle (xlsx)
KRT_URL = (
DOWNLOAD_BASE +
'/Kompendium/krt2023_Excel.xlsx'
'?__blob=publicationFile&v=7'
)

def __init__(self, tmpdir: Optional[str] = None) -> None:
super().__init__(tmpdir)

# folder of bausteine
self.baustein_dir = os.path.join(self.baustein_dir, 'Einzeln_PDF')
1 change: 1 addition & 0 deletions tools/lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


def download_binary(url: str, dest: str) -> None:
print('Downloading: {}'.format(url))
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(dest, 'wb') as f:
Expand Down

0 comments on commit daf3630

Please sign in to comment.