Skip to content

Commit

Permalink
obfuscation enhancement
Browse files Browse the repository at this point in the history
  • Loading branch information
simagix committed May 3, 2023
1 parent a144369 commit fedd96b
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 104 deletions.
6 changes: 5 additions & 1 deletion hatchet.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ func Run(fullVersion string) {
if err != nil {
log.Fatal(err)
}
if err = os.WriteFile(*infile+".json", data, 0644); err != nil {
jfile := *infile+".json"
if *infile == "-" {
jfile = "stdin.json"
}
if err = os.WriteFile(jfile, data, 0644); err != nil {
log.Fatal(err)
}
}
Expand Down
2 changes: 0 additions & 2 deletions legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,6 @@ func AddLegacyString(doc *Logv2Info) error {
remote.IP = toks[0]
}
}
} else {
log.Println("key 'mongos' under 'attr.command.$client' not found, report an issue at https://github.com/simagix/hatchet/issues")
}
}
if remote.IP != "" {
Expand Down
5 changes: 2 additions & 3 deletions logv2.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,8 @@ func (ptr *Logv2) Analyze(logname string) error {

doc := Logv2Info{}
if err = bson.UnmarshalExtJSON([]byte(str), false, &doc); err != nil {
log.Println("clean up database due to error:", err)
dbase.Drop()
return fmt.Errorf("line %d: %v", index, err)
log.Println("line", index, err)
continue
}

if err = AddLegacyString(&doc); err != nil {
Expand Down
162 changes: 105 additions & 57 deletions obfuscation.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,26 @@ import (
var (
cities = []string{
"Atlanta", "Berlin", "Chicago", "Dublin", "ElPaso",
"FortWorth", "Greenville", "Hongkong", "Indianapolis", "Jacksonville",
"Foshan", "Giza", "Hongkong", "Istanbul", "Jakarta",
"London", "Miami", "NewYork", "Orlando", "Paris",
"Queens", "Rome", "Sydney", "Taipei", "Utica",
"Vancouver", "Warsaw", "Xiamen", "Yonkers", "Zurich",
}
flowers = []string{
"Aster", "Begonia", "Carnation", "Daisy", "Echinacea",
"Aster", "Begonia", "Carnation", "Daisy", "Erica",
"Freesia", "Gardenia", "Hyacinth", "Iris", "Jasmine",
"Kalmia", "Lavender", "Marigold", "Narcissus", "Orchid",
"Peony", "Rose", "Sunflower", "Tulip", "Ursinia",
"Violet", "Wisteria", "Xeranthemum", "Yarrow", "Zinnia",
"Violet", "Wisteria", "Xylobium", "Yarrow", "Zinnia",
}
)

type Obfuscation struct {
Coefficient float64 `json:"coefficient"`
CardMap map[string]string `json:"card_map"`
EmailMap map[string]string `json:"email_map"`
HostMap map[string]string `json:"host_map"`
intMap map[int]int
IPMap map[string]string `json:"ip_map"`
NameMap map[string]string `json:"name_map"`
numberMap map[string]float64
PhoneMap map[string]string `json:"phone_map"`
SSNMap map[string]string `json:"ssn_map"`
Expand All @@ -55,37 +54,40 @@ func NewObfuscation() *Obfuscation {
obs.Coefficient = 0.923 - rand.Float64()*0.05
obs.Coefficient = math.Round(obs.Coefficient*1000) / 1000
obs.CardMap = make(map[string]string)
obs.EmailMap = make(map[string]string)
obs.HostMap = make(map[string]string)
obs.intMap = make(map[int]int)
obs.IPMap = make(map[string]string)
obs.NameMap = make(map[string]string)
obs.numberMap = make(map[string]float64)
obs.PhoneMap = make(map[string]string)
obs.SSNMap = make(map[string]string)
return &obs
}

func (ptr *Obfuscation) ObfuscateFile(filename string) error {
var err error
var buf []byte
var isPrefix bool
var reader *bufio.Reader
file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()
if reader, err = gox.NewReader(file); err != nil {
return err
var scanner *bufio.Scanner
if filename == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()
if reader, err = gox.NewReader(file); err != nil {
return err
}
scanner = bufio.NewScanner(reader)
}

for {
if buf, isPrefix, err = reader.ReadLine(); err != nil { // 0x0A separator = newline
break
}
if len(buf) == 0 {
for scanner.Scan() {
str := scanner.Text()
if str == "" {
continue
}
str := string(buf)
for isPrefix {
var bbuf []byte
if bbuf, isPrefix, err = reader.ReadLine(); err != nil {
Expand Down Expand Up @@ -195,30 +197,22 @@ func (ptr *Obfuscation) ObfuscateString(value string) string {
newValue := fmt.Sprintf(":%v", int(float64(ToInt(matched[1:]))*ptr.Coefficient))
value = strings.Replace(value, matched, newValue, -1)
}
if IsCreditCardNo(value) {
if ContainsCreditCardNo(value) {
value = ptr.ObfuscateCreditCardNo(value)
}
if IsFQDN(value) {
value = ptr.ObfuscateFQDN(value)
}
if IsEmail(value) {
value = ptr.ObfuscateEmail(value)
}
if IsIP(value) {
value = ptr.ObfuscateIP(value)
}
if IsSSN(value) {
value = ptr.ObfuscateSSN(value)
}
if IsPhoneNo(value) {
value = ptr.ObfuscatePhoneNo(value)
}
// the following 3, don't change the order
value = ptr.ObfuscateEmail(value)
value = ptr.ObfuscateNS(value)
value = ptr.ObfuscateFQDN(value)
value = ptr.ObfuscateIP(value)
value = ptr.ObfuscateSSN(value)
value = ptr.ObfuscatePhoneNo(value)
return value
}

// ObfuscateCreditCardNo obfuscate digits with '*' except for last 4 digits
func (ptr *Obfuscation) ObfuscateCreditCardNo(cardNo string) string {
if len(cardNo) < 4 {
if !ContainsCreditCardNo(cardNo) {
return cardNo
}
lastFourDigits := cardNo[len(cardNo)-4:]
Expand All @@ -239,19 +233,23 @@ func (ptr *Obfuscation) ObfuscateCreditCardNo(cardNo string) string {

// ObfuscateEmail replace domain name with a city name and user name with a flower name
func (ptr *Obfuscation) ObfuscateEmail(email string) string {
rand.Seed(time.Now().UnixNano())
if !ContainsEmailAddress(email) {
return email
}
city := cities[rand.Intn(len(cities))]
flower := flowers[rand.Intn(len(flowers))]
emailRegex := regexp.MustCompile(`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`)
matches := emailRegex.FindStringSubmatch(email)
if len(matches) > 0 {
matched := matches[0]
newValue := ""
if ptr.EmailMap[matched] != "" {
newValue = ptr.EmailMap[matched]
if ptr.NameMap[matched] != "" {
newValue = ptr.NameMap[matched]
} else {
rand.Seed(time.Now().UnixNano())
newValue = strings.ToLower(flower + "@" + city + ".com")
ptr.EmailMap[matched] = newValue
ptr.NameMap[matched] = newValue
ptr.NameMap[newValue] = newValue
}
return strings.Replace(email, matched, newValue, -1)
}
Expand All @@ -260,6 +258,9 @@ func (ptr *Obfuscation) ObfuscateEmail(email string) string {

// ObfuscateIP returns a new IP but keep the first and last octets the same
func (ptr *Obfuscation) ObfuscateIP(ip string) string {
if !ContainsIP(ip) {
return ip
}
ipRegex := regexp.MustCompile(`\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`)
matches := ipRegex.FindStringSubmatch(ip)
if len(matches) > 0 {
Expand All @@ -283,35 +284,79 @@ func (ptr *Obfuscation) ObfuscateIP(ip string) string {
return ip
}

// ObfuscateFQDN returns a obfuscated FQDN
func (ptr *Obfuscation) ObfuscateFQDN(fqdn string) string {
rand.Seed(time.Now().UnixNano())
fqdnRegex := regexp.MustCompile(`([_a-zA-Z0-9]+(-[_a-zA-Z0-9]+)*\.)+[a-zA-Z]{2,}`)
if !ContainsFQDN(fqdn) {
return fqdn
}
fqdnRegex := regexp.MustCompile(`([a-zA-Z0-9-]{1,63}\.)+[a-zA-Z]{2,63}`)
matches := fqdnRegex.FindStringSubmatch(fqdn)
if len(matches) > 0 {
matched := matches[0]
newValue := ""
if ptr.HostMap[matched] != "" {
newValue = ptr.HostMap[matched]
if ptr.NameMap[matched] != "" {
newValue = ptr.NameMap[matched]
} else {
rand.Seed(time.Now().UnixNano())
city := cities[rand.Intn(len(cities))]
flower := flowers[rand.Intn(len(flowers))]
parts := strings.Split(matches[0], ".")
parts := strings.Split(matched, ".")
if len(parts) > 2 {
tail := parts[len(parts)-1]
newValue = strings.ToLower(flower + "." + city + "." + tail)
} else {
newValue = strings.ToLower(city + "." + flower)
}
ptr.HostMap[matched] = newValue
ptr.NameMap[matched] = newValue
ptr.NameMap[newValue] = newValue // so, it won't be replaced again
}
return strings.Replace(fqdn, matched, newValue, -1)
}
return fqdn
}

// ObfuscateSSN returns a random SSN
// ObfuscateNS returns a obfuscated namespace
func (ptr *Obfuscation) ObfuscateNS(ns string) string {
if !IsNamespace(ns) {
return ns
}
re := regexp.MustCompile("[0-9.]")
charts := re.ReplaceAllString(ns, "")
if len(charts) == 0 {
return ns
}
fqdnRegex := regexp.MustCompile(`[^@$.\n]*\.[^^@.\n]*([.][^^@.\n]*)?`)
matches := fqdnRegex.FindStringSubmatch(ns)
if len(matches) > 0 {
matched := matches[0]
newValue := ""
if ptr.NameMap[matched] != "" {
newValue = ptr.NameMap[matched]
} else {
rand.Seed(time.Now().UnixNano())
city := cities[rand.Intn(len(cities))]
flower := flowers[rand.Intn(len(flowers))]
parts := strings.Split(matched, ".")
if len(parts) > 2 {
tail := parts[len(parts)-1]
newValue = strings.ToLower(flower + "." + city + "." + tail)
} else {
newValue = strings.ToLower(city + "." + flower)
}
ptr.NameMap[matched] = newValue
ptr.NameMap[newValue] = newValue
}
return strings.Replace(ns, matched, newValue, -1)
}
return ns
}

// ObfuscateSSN returns a obfuscated SSN
func (ptr *Obfuscation) ObfuscateSSN(ssn string) string {
ssnRegex := regexp.MustCompile(`\d{3}-?\d{2}-?\d{4}`)
if !IsSSN(ssn) {
return ssn
}
ssnRegex := regexp.MustCompile(`\d{3}-\d{2}-\d{4}`)
matches := ssnRegex.FindStringSubmatch(ssn)
if len(matches) > 0 {
matched := matches[0]
Expand All @@ -338,25 +383,28 @@ func (ptr *Obfuscation) ObfuscateSSN(ssn string) string {
}

// ObfuscatePhoneNo returns a randome phone number with the same format
func (ptr *Obfuscation) ObfuscatePhoneNo(phone string) string {
if ptr.PhoneMap[phone] != "" {
return ptr.PhoneMap[phone]
func (ptr *Obfuscation) ObfuscatePhoneNo(phoneNo string) string {
if !ContainsPhoneNo(phoneNo) {
return phoneNo
}
if ptr.PhoneMap[phoneNo] != "" {
return ptr.PhoneMap[phoneNo]
}
rand.Seed(time.Now().UnixNano())
obfuscated := make([]byte, len(phone))
obfuscated := make([]byte, len(phoneNo))
n := 0
for i := range obfuscated {
if phone[i] >= '0' && phone[i] <= '9' {
if phoneNo[i] >= '0' && phoneNo[i] <= '9' {
n++
if n > 5 {
obfuscated[i] = byte(rand.Intn(10) + '0')
} else {
obfuscated[i] = phone[i]
obfuscated[i] = phoneNo[i]
}
} else {
obfuscated[i] = phone[i]
obfuscated[i] = phoneNo[i]
}
}
ptr.PhoneMap[phone] = string(obfuscated)
ptr.PhoneMap[phoneNo] = string(obfuscated)
return string(obfuscated)
}
26 changes: 24 additions & 2 deletions obfuscation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func TestObfuscateCreditCardNo(t *testing.T) {
func TestObfuscateEmail(t *testing.T) {
// Initialize the Obfuscation struct
o := &Obfuscation{
EmailMap: make(map[string]string),
NameMap: make(map[string]string),
}

// Test case 1: Obfuscating a valid email address
Expand Down Expand Up @@ -161,7 +161,7 @@ func TestObfuscateIP(t *testing.T) {
func TestObfuscateFQDN(t *testing.T) {
// Initialize the Obfuscation struct
o := &Obfuscation{
HostMap: make(map[string]string),
NameMap: make(map[string]string),
}

// Test case 1: Obfuscating a valid FQDN with 2 parts
Expand All @@ -188,6 +188,28 @@ func TestObfuscateFQDN(t *testing.T) {
}
}

func TestObfuscateNS(t *testing.T) {
ptr := &Obfuscation{
NameMap: make(map[string]string),
}

// Test case 1: Obfuscate a valid FQDN with two labels
for _, ns := range []string{"example.com", "mail.example.com"} {
obfuscated := ptr.ObfuscateNS(ns)
if obfuscated == ns || !IsNamespace(obfuscated) {
t.Errorf("ObfuscateNS(%q) returned %q, expected %q", ns, obfuscated, ns)
}
}

// Test case 1: Obfuscate a valid FQDN with two labels
for _, ns := range []string{"user@example.com", "user@mail.example.com"} {
obfuscated := ptr.ObfuscateNS(ns)
if obfuscated != ns {
t.Errorf("ObfuscateNS(%q) returned %q, expected %q", ns, obfuscated, ns)
}
}
}

func TestObfuscateSSN(t *testing.T) {
// Initialize the Obfuscation struct
o := &Obfuscation{
Expand Down
Loading

0 comments on commit fedd96b

Please sign in to comment.