Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ Netconan can anonymize *many types of sensitive information*:
* IPv4 and IPv6 addresses (``--anonymize-ips``, ``-a``).
* User-specified sensitive words (``--sensitive-words``, ``-w``). *Note that any occurrence of a specified sensitive word will be replaced regardless of context, even if it is part of a larger string.*
* User-specified AS numbers (``--as-numbers``, ``-n``). *Note that any number matching a specified AS number will be anonymized.*
* Description fields (``--anonymize-descriptions``). *Description text is replaced with a deterministic hash, preserving surrounding quotes and semicolons.*


Netconan attempts to *preserve useful structure*. For example,
Expand Down
25 changes: 25 additions & 0 deletions netconan/anonymize_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,17 @@
import logging
import os
import random
import re
import string
import sys
from collections.abc import Sequence
from typing import IO

from .default_reserved_words import default_reserved_words
from .description_anonymization import (
generate_description_regexes,
replace_descriptions,
)
from .ip_anonymization import IpAnonymizer, IpV6Anonymizer, anonymize_ip_addr
from .sensitive_item_removal import (
AsNumberAnonymizer,
Expand Down Expand Up @@ -54,6 +59,7 @@ def __init__(
preserve_networks: Sequence[str] | None = None,
preserve_suffix_v4: int | None = None,
preserve_suffix_v6: int | None = None,
anon_descriptions: bool = False,
) -> None:
"""Creates anonymizer classes."""
self.undo_ip_anon = undo_ip_anon
Expand All @@ -64,6 +70,8 @@ def __init__(
self.anonymizer_sensitive_word: SensitiveWordAnonymizer | None = None
self.compiled_regexes: list[list[CompiledRegexRule]] | None = None
self.pwd_lookup: dict[str, str] | None = None
self.description_regexes: list[re.Pattern[str]] | None = None
self.description_lookup: dict[str, str] | None = None

# The salt is only used for IP and sensitive word anonymization
if salt is None:
Expand All @@ -74,6 +82,10 @@ def __init__(
self.salt: str = salt
logging.debug('Using salt: "%s"', self.salt)

if anon_descriptions:
self.description_regexes = generate_description_regexes()
self.description_lookup = {}

if anon_pwd:
self.compiled_regexes = generate_default_sensitive_item_regexes()
self.pwd_lookup = {}
Expand Down Expand Up @@ -122,6 +134,17 @@ def anonymize_io(self, in_io: IO[str], out_io: IO[str]) -> None:
if self.anonymizer_sensitive_word is not None:
output_line = self.anonymizer_sensitive_word.anonymize(output_line)

if (
self.description_regexes is not None
and self.description_lookup is not None
):
output_line = replace_descriptions(
self.description_regexes,
output_line,
self.description_lookup,
self.salt,
)

if self.anonymizer_as_num is not None:
output_line = anonymize_as_numbers(self.anonymizer_as_num, output_line)

Expand All @@ -146,6 +169,7 @@ def anonymize_files(
preserve_networks: Sequence[str] | None = None,
preserve_suffix_v4: int | None = None,
preserve_suffix_v6: int | None = None,
anon_descriptions: bool = False,
) -> None:
"""Anonymize each file in input and save to output."""
use_stdin = input_path == "-"
Expand Down Expand Up @@ -196,6 +220,7 @@ def anonymize_files(
salt=salt,
sensitive_words=sensitive_words,
undo_ip_anon=undo_ip_anon,
anon_descriptions=anon_descriptions,
)

for in_path, out_path in file_list:
Expand Down
65 changes: 65 additions & 0 deletions netconan/description_anonymization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Anonymize description fields in network configuration files."""

import hashlib
import re

# Matches: description "some text here"
_DESCRIPTION_QUOTED_REGEX = re.compile(
r'(?P<pre>description\s+")(?P<desc>[^"]+)(?P<post>")'
)

# Matches: description some text here (with optional trailing semicolon)
_DESCRIPTION_UNQUOTED_REGEX = re.compile(
r"(?P<pre>description\s+)(?P<desc>[^\";\s].+?)\s*(?P<post>;?\s*)$"
)


def anonymize_description(value: str, lookup: dict[str, str], salt: str) -> str:
"""Return a deterministic anonymized replacement for a description value.

Uses SHA-256 hashing with the given salt to produce a stable 8-character
base32-encoded identifier prefixed with 'descr_'.
"""
if value in lookup:
return lookup[value]
hash_input = (salt + value).encode("utf-8")
digest = hashlib.sha256(hash_input).digest()
# Use first 5 bytes -> 8 base32 chars, strip padding, lowercase
anon = "descr_" + _base32_encode(digest[:5]).lower()
lookup[value] = anon
return anon


def _base32_encode(data: bytes) -> str:
"""Base32 encode bytes and strip padding."""
import base64

return base64.b32encode(data).decode("ascii").rstrip("=")


def generate_description_regexes() -> list[re.Pattern[str]]:
"""Return list of compiled regexes for matching description lines."""
return [_DESCRIPTION_QUOTED_REGEX, _DESCRIPTION_UNQUOTED_REGEX]


def replace_descriptions(
regexes: list[re.Pattern[str]], line: str, lookup: dict[str, str], salt: str
) -> str:
"""Replace description content in a line if it matches any regex.

First match wins. Returns the line with description content replaced,
preserving surrounding context (quotes, semicolons, whitespace).
"""
for regex in regexes:
match = regex.search(line)
if match:
desc_value = match.group("desc")
anon_value = anonymize_description(desc_value, lookup, salt)
return (
line[: match.start()]
+ match.group("pre")
+ anon_value
+ match.group("post")
+ line[match.end() :]
)
return line
8 changes: 8 additions & 0 deletions netconan/netconan.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
default=8,
help="Preserve the trailing bits of IP addresses, aka the host bits of a network. Set this value large enough to represent the largest interface network (e.g., 8 for a /24 or 12 for a /20) or NAT pool.",
)
parser.add_argument(
"--anonymize-descriptions",
action="store_true",
default=False,
help="Anonymize description fields with deterministic hashed replacements",
)
result: argparse.Namespace = parser.parse_args(argv)
return result

Expand Down Expand Up @@ -220,6 +226,7 @@ def main(argv: list[str] = sys.argv[1:]) -> None:
args.anonymize_passwords,
args.anonymize_ips,
args.undo,
args.anonymize_descriptions,
]
):
logging.warning(
Expand All @@ -239,6 +246,7 @@ def main(argv: list[str] = sys.argv[1:]) -> None:
reserved_words,
preserve_prefixes,
preserve_addresses,
anon_descriptions=args.anonymize_descriptions,
preserve_suffix_v4=args.preserve_host_bits,
preserve_suffix_v6=args.preserve_host_bits,
)
Expand Down
70 changes: 70 additions & 0 deletions tests/end_to_end/test_e2e_descriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""End-to-end tests for description anonymization."""

from netconan.netconan import main

INPUT_CONTENTS = """\
interface GigabitEthernet0/0
description "uplink to core-router1 (port 14)"
ip address 10.0.0.1 255.255.255.0
!
interface GigabitEthernet0/1
description link-to-provider;
ip address 10.0.0.2 255.255.255.0
!
"""


def test_e2e_descriptions(tmpdir):
"""Test that --anonymize-descriptions replaces description content."""
filename = "test.cfg"
input_dir = tmpdir.mkdir("input")
input_dir.join(filename).write(INPUT_CONTENTS)

output_dir = tmpdir.mkdir("output")
args = [
"-i",
str(input_dir),
"-o",
str(output_dir),
"-s",
"E2ESALT",
"--anonymize-descriptions",
]
main(args)

with open(str(output_dir.join(filename))) as f:
output = f.read()

# Description content should be replaced
assert "uplink to core-router1 (port 14)" not in output
assert "link-to-provider" not in output
assert "descr_" in output

# Non-description lines should be preserved
assert "interface GigabitEthernet0/0" in output
assert "ip address 10.0.0.1 255.255.255.0" in output
assert "interface GigabitEthernet0/1" in output


def test_e2e_descriptions_deterministic(tmpdir):
"""Test that description anonymization is deterministic with same salt."""
filename = "test.cfg"

input_dir1 = tmpdir.mkdir("input1")
input_dir1.join(filename).write(INPUT_CONTENTS)
output_dir1 = tmpdir.mkdir("output1")

input_dir2 = tmpdir.mkdir("input2")
input_dir2.join(filename).write(INPUT_CONTENTS)
output_dir2 = tmpdir.mkdir("output2")

args_base = ["-s", "DETSALT", "--anonymize-descriptions"]

main(args_base + ["-i", str(input_dir1), "-o", str(output_dir1)])
main(args_base + ["-i", str(input_dir2), "-o", str(output_dir2)])

with (
open(str(output_dir1.join(filename))) as f1,
open(str(output_dir2.join(filename))) as f2,
):
assert f1.read() == f2.read()
Loading
Loading