Skip to content

Commit

Permalink
Markup for missed creds after escape sequence (#177)
Browse files Browse the repository at this point in the history
* 399221f4.java

* 6aae1316

* 6aae1316

* 0f133e09

* upd

* done

* upd

* [skip actions] [esc] 2024-12-07T18:21:50+02:00

* [skip actions] [esc] 2024-12-07T18:26:42+02:00

* other

* reportfix

* 7738e44d

* b6b2487d

* [skip actions] [esc] 2024-12-07T20:30:06+02:00

* [skip actions] [esc] 2024-12-07T20:30:59+02:00

* [skip actions] [esc] 2024-12-07T21:04:13+02:00

* [skip actions] [esc] 2024-12-07T21:07:45+02:00

* [skip actions] [esc] 2024-12-07T21:09:49+02:00

* [skip actions] [esc] 2024-12-07T21:10:46+02:00

* upd

* bitbucket

* Bitbucket

* lineopt

* updBMscor

* --fix

* --fix bool

* [skip actions] [esc] 2024-12-09T09:52:43+02:00

* [skip actions] [esc] 2024-12-09T09:53:05+02:00

* fix

* newline

* Certificate

* extra

* --fix

* rollback

* fix miss

* correction

* correction2

* True

* upd

* rollback

* markup

* fix

* markupfix

* Delete .ci/benchmark.txt.orig
  • Loading branch information
babenek authored Dec 10, 2024
1 parent f144fcb commit ae5385a
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 43 deletions.
28 changes: 14 additions & 14 deletions .ci/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
META MD5 5bb0a05fd77c2761b8414bba41103939
DATA MD5 9e77a2d9f718f175264ab5a386ae86c4
DATA: 16342283 interested lines. MARKUP: 62022 items
META MD5 491a59236c4d6280b46e0285ce2209e4
DATA MD5 65e29f238760e1283df0f9762f9f1459
DATA: 16342283 interested lines. MARKUP: 62023 items
FileType FileNumber ValidLines Positives Negatives Templates
--------------- ------------ ------------ ----------- ----------- -----------
194 28318 71 418 90
Expand Down Expand Up @@ -63,7 +63,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.gd 1 37 1
.gml 3 3075 16
.gni 3 5017 19
.go 1080 566476 687 4131 747
.go 1080 566476 688 4131 747
.golden 5 1168 1 13 29
.gradle 45 3265 4 90 100
.graphql 7 420 13
Expand All @@ -82,11 +82,11 @@ FileType FileNumber ValidLines Positives Negatives Templat
.ipynb 1 134 5
.j 1 241 4
.j2 30 5530 6 186 10
.java 621 134132 368 1365 171
.java 621 134132 370 1365 171
.jenkinsfile 1 58 2 6
.jinja2 1 64 2
.js 659 536413 531 2497 331
.json 851 13046493 1077 10907 140
.js 659 536413 533 2497 331
.json 851 13046493 1079 10907 140
.jsp 13 3202 1 40
.jsx 7 857 19
.jwt 1 1 2
Expand Down Expand Up @@ -153,7 +153,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.pug 2 193 2
.purs 1 69 4
.pxd 1 150 5 2
.py 890 291553 681 3303 726
.py 890 291553 682 3303 726
.pyi 4 1361 9
.pyp 1 167 1
.pyx 2 1094 23
Expand Down Expand Up @@ -206,10 +206,10 @@ FileType FileNumber ValidLines Positives Negatives Templat
.toml 83 2379 53 105 156
.tpl 1 43 1
.travis 1 34 4 3 1
.ts 583 106730 157 1800 203
.ts 583 106730 158 1800 203
.tsx 54 7914 1 114 5
.ttar 1 452 1
.txt 440 78102 5287 6354 49
.txt 440 78102 5289 6354 49
.utf8 1 77 2
.vsixmanifest 1 36 1
.vsmdi 1 6 2
Expand All @@ -222,7 +222,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.yml 419 36169 559 889 376
.zsh 6 872 12
.zsh-theme 1 97 1
TOTAL: 10232 16342283 12261 49692 5101
TOTAL: 10232 16342283 12272 49692 5101
credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0
Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1
------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ----
Expand All @@ -233,7 +233,7 @@ AWS S3 Bucket 67 23 0
Atlassian Old PAT token 27 308 3 0 0 311 27 0.000000 1.000000 0.920118 0.000000
Auth 417 2739 82 0 0 2821 417 0.000000 1.000000 0.871217 0.000000
Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000
BASE64 Private Key 7 4 0 0 0 4 7 0.000000 1.000000 0.363636 0.000000
BASE64 Private Key 12 4 0 0 0 4 12 0.000000 1.000000 0.250000 0.000000
BASE64 encoded PEM Private Key 7 0 0 0 0 0 7 1.000000 0.000000 0.000000
Bitbucket Client ID 143 2095 9 0 0 2104 143 0.000000 1.000000 0.936360 0.000000
Bitbucket Client Secret 301 807 10 0 0 817 301 0.000000 1.000000 0.730769 0.000000
Expand Down Expand Up @@ -270,5 +270,5 @@ Tencent WeChat API App ID 6 0 0
Token 644 4170 454 0 0 4624 644 0.000000 1.000000 0.877752 0.000000
Twilio Credentials 30 39 0 0 0 39 30 0.000000 1.000000 0.565217 0.000000
URL Credentials 210 157 215 0 0 372 210 0.000000 1.000000 0.639175 0.000000
UUID 1069 265 0 0 0 265 1069 0.000000 1.000000 0.198651 0.000000
12261 49692 5101 0 0 0 49692 12261 0.000000 1.000000 0.802092 0.000000
UUID 1075 265 0 0 0 265 1075 0.000000 1.000000 0.197761 0.000000
12272 49692 5101 0 0 0 49692 12272 0.000000 1.000000 0.801950 0.000000
60 changes: 33 additions & 27 deletions benchmark/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,41 +260,40 @@ def check_line_from_meta(self,
f",{line_start},{line_end}" \
f",F,F,{value_start},{value_end}" \
f",F,F,,,,,0.0,0,F,F,F,{rule}"
lost_meta = MetaRow({
"Id": self.meta_next_id,
"FileID": file_id,
"Domain": "GitHub",
"RepoName": project_id,
"FilePath": data_path,
"LineStart": line_start,
"LineEnd": line_end,
"GroundTruth": 'F',
"WithWords": 'F',
"ValueStart": value_start,
"ValueEnd": value_end,
"InURL": 'F',
"InRuntimeParameter": 'F',
"CharacterSet": '',
"CryptographyKey": '',
"PredefinedPattern": '',
"VariableNameType": '',
"Entropy": 0.0,
"Length": 0,
"Base64Encode": 'F',
"HexEncode": 'F',
"URLEncode": 'F',
"Category": rule
})

if not (rows := self.meta.get(MetaKey(data_path, line_start, line_end))):
self.lost_cnt += 1
self.meta_next_id += 1
print(f"NOT FOUND WITH KEY: {approximate}", flush=True)
if self.fix:
with open(f"{self.cred_data_dir}/meta/{project_id}.csv", "a") as f:
f.write(f"{str(approximate)}\n")
lost_meta = MetaRow({
"Id": self.meta_next_id,
"FileID": file_id,
"Domain": "GitHub",
"RepoName": project_id,
"FilePath": data_path,
"LineStart": line_start,
"LineEnd": line_end,
"GroundTruth": 'F',
"WithWords": 'F',
"ValueStart": value_start,
"ValueEnd": value_end,
"InURL": 'F',
"InRuntimeParameter": 'F',
"CharacterSet": '',
"CryptographyKey": '',
"PredefinedPattern": '',
"VariableNameType": '',
"Entropy": 0.0,
"Length": 0,
"Base64Encode": 'F',
"HexEncode": 'F',
"URLEncode": 'F',
"Category": rule
})
self.meta[MetaKey(data_path, line_start, line_end)] = [lost_meta]

self.meta_next_id += 1
return LineStatus.NOT_IN_DB, project_id, file_id

suggestion = "LOST:"
Expand Down Expand Up @@ -363,10 +362,17 @@ def check_line_from_meta(self,
["sed", "-i",
f"s/{row.Id},\\(.*\\)/{row.Id},\\1:{rule}/",
f"{self.cred_data_dir}/meta/{row.RepoName}.csv"])
self.meta[MetaKey(data_path, line_start, line_end)].append(lost_meta)
lost_meta = None

# meta has no markup for given credential
self.lost_cnt += 1
print(f"{suggestion} {approximate}", flush=True)
self.meta_next_id += 1
if lost_meta and self.fix:
with open(f"{self.cred_data_dir}/meta/{project_id}.csv", "a") as f:
f.write(f"{str(approximate)}\n")
self.meta[MetaKey(data_path, line_start, line_end)].append(lost_meta)
return LineStatus.NOT_IN_DB, project_id, file_id

def analyze_result(self) -> None:
Expand Down
3 changes: 3 additions & 0 deletions meta/0f133e09.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1770,3 +1770,6 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
1351521,c7b13616,GitHub,0f133e09,data/0f133e09/test/c7b13616.txt,9,9,T,F,1250,1302,F,F,,,,,0.0,0,F,F,F,AWS S3 Bucket
1351522,c7b13616,GitHub,0f133e09,data/0f133e09/test/c7b13616.txt,9,9,T,F,1584,1613,F,F,,,,,0.0,0,F,F,F,AWS S3 Bucket
1479613,03c77d4f,GitHub,0f133e09,data/0f133e09/src/03c77d4f.py,314,314,F,F,49,54,F,F,,,,,0.0,0,F,F,F,Auth
1480549,03cdc0c5,GitHub,0f133e09,data/0f133e09/test/03cdc0c5.py,95,95,T,F,397,433,F,F,,,,,0.0,0,F,F,F,UUID
1480550,c65fed08,GitHub,0f133e09,data/0f133e09/test/c65fed08.txt,9,9,T,F,668,704,F,F,,,,,0.0,0,F,F,F,UUID
1480551,c65fed08,GitHub,0f133e09,data/0f133e09/test/c65fed08.txt,10,10,T,F,384,420,F,F,,,,,0.0,0,F,F,F,UUID
1 change: 1 addition & 0 deletions meta/49e2a965.csv
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
108549,ae95a565,GitHub,49e2a965,data/49e2a965/other/ae95a565.md,32,32,F,F,-1,-1,F,F,,,,,0.0,-1,F,F,F,Other
114654,ae95a565,GitHub,49e2a965,data/49e2a965/other/ae95a565.md,195,195,F,F,,,F,F,,,,,0.00,,F,F,F,Auth
131632,ae95a565,GitHub,49e2a965,data/49e2a965/other/ae95a565.md,72,72,F,F,,,F,F,,,,,0.00,,F,F,F,Password
1480555,76ccf172,GitHub,49e2a965,data/49e2a965/test/76ccf172.ts,74,74,T,F,46,,F,F,,,,,0.0,0,F,F,F,BASE64 Private Key
1 change: 1 addition & 0 deletions meta/5cecf769.csv
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
133375,3184cf27,GitHub,5cecf769,data/5cecf769/src/3184cf27.cc,518,518,F,F,,,F,F,,,,,0,0,F,F,F,API
133376,c8e8fa46,GitHub,5cecf769,data/5cecf769/src/c8e8fa46.bzl,19,19,F,F,,,F,F,,,,,0,0,F,F,F,API
133377,c8e8fa46,GitHub,5cecf769,data/5cecf769/src/c8e8fa46.bzl,78,78,F,F,,,F,F,,,,,0,0,F,F,F,API
1480556,724f0a84,GitHub,5cecf769,data/5cecf769/test/724f0a84.json,5,5,T,F,51,,F,F,,,,,0.0,0,F,F,F,BASE64 Private Key
1 change: 1 addition & 0 deletions meta/81cd05d0.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5252,3 +5252,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
1341487,f02052a6,GitHub,81cd05d0,data/81cd05d0/src/f02052a6.json,3761,3761,T,F,1148,1184,F,F,,,,,0.0,0,F,F,F,UUID:Token
1479614,334b19eb,GitHub,81cd05d0,data/81cd05d0/src/334b19eb.json,32577,32577,T,F,28,,F,F,,,,,0.0,0,F,F,F,Secret
1479615,334b19eb,GitHub,81cd05d0,data/81cd05d0/src/334b19eb.json,32790,32790,T,F,28,,F,F,,,,,0.0,0,F,F,F,Secret
1480509,357f73fe,GitHub,81cd05d0,data/81cd05d0/src/357f73fe.json,108443,108443,T,F,247,283,F,F,,,,,0.0,0,F,F,F,UUID
1 change: 1 addition & 0 deletions meta/8ba59c91.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1019,3 +1019,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
133847,73c8b62a,GitHub,8ba59c91,data/8ba59c91/src/73c8b62a.json,241,241,F,F,,,F,F,,,,,0,0,F,F,F,Auth
133848,ad18488c,GitHub,8ba59c91,data/8ba59c91/src/ad18488c.json,241,241,F,F,,,F,F,,,,,0,0,F,F,F,Auth
133849,ec71fabd,GitHub,8ba59c91,data/8ba59c91/src/ec71fabd.json,241,241,F,F,,,F,F,,,,,0,0,F,F,F,Auth
1480572,f7e33e13,GitHub,8ba59c91,data/8ba59c91/src/f7e33e13.js,19,19,T,F,55,,F,F,,,,,0.0,0,F,F,F,BASE64 Private Key
1 change: 1 addition & 0 deletions meta/e0b41e26.csv
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
138124,2a34abd7,GitHub,e0b41e26,data/e0b41e26/test/2a34abd7.ndjson,4,4,F,F,102,122,F,F,,,,,0.0,0,F,F,F,Token
138125,2a34abd7,GitHub,e0b41e26,data/e0b41e26/test/2a34abd7.ndjson,6,6,F,F,104,142,F,F,,,,,0.0,0,F,F,F,Token
1013650,508b8489,GitHub,e0b41e26,data/e0b41e26/test/508b8489.js,189,189,Template,F,34,48,F,F,,,,,0.0,0,F,F,F,Token
1480573,c72880b5,GitHub,e0b41e26,data/e0b41e26/test/c72880b5.js,332,332,T,F,43,,F,F,,,,,0.0,0,F,F,F,BASE64 Private Key
4 changes: 2 additions & 2 deletions meta/ec138349.csv
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,6 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
1338530,18b43943,GitHub,ec138349,data/ec138349/test/18b43943.java,47,47,F,F,6,101,F,F,,,,,0.0,0,F,F,F,JSON Web Token
1338573,2f9b15a9,GitHub,ec138349,data/ec138349/test/2f9b15a9.java,125,125,Template,F,58,65,F,F,,,,,0.0,0,F,F,F,Auth:Token
1338575,2f9b15a9,GitHub,ec138349,data/ec138349/test/2f9b15a9.java,158,158,F,F,58,68,F,F,,,,,0.0,0,F,F,F,Auth:Token
1480452,399221f4,GitHub,ec138349,data/ec138349/test/399221f4.java,52,52,T,F,135,171,F,F,,,,,0.0,0,F,F,F,Auth:Nonce
1480452,399221f4,GitHub,ec138349,data/ec138349/test/399221f4.java,52,52,T,F,135,171,F,F,,,,,0.0,0,F,F,F,Auth:Nonce:UUID
1480456,399221f4,GitHub,ec138349,data/ec138349/test/399221f4.java,64,64,T,F,148,167,F,F,,,,,0.0,0,F,F,F,Auth:Nonce
1480457,399221f4,GitHub,ec138349,data/ec138349/test/399221f4.java,52,52,T,F,256,292,F,F,,,,,0.0,0,F,F,F,Auth:Token
1480457,399221f4,GitHub,ec138349,data/ec138349/test/399221f4.java,52,52,T,F,256,292,F,F,,,,,0.0,0,F,F,F,Auth:Token:UUID
1 change: 1 addition & 0 deletions meta/f008dd40.csv
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,4 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value
134727,a40d8da9,GitHub,f008dd40,data/f008dd40/test/a40d8da9.go,274,274,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
134728,c592e77b,GitHub,f008dd40,data/f008dd40/test/c592e77b.go,226,226,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key
1036094,86db8848,GitHub,f008dd40,data/f008dd40/src/86db8848.yml,12,12,T,F,130,138,F,F,Any,,,Secret,4.94,68,F,F,F,Password
1480574,813dc2b8,GitHub,f008dd40,data/f008dd40/test/813dc2b8.go,11,11,T,F,64,,F,F,,,,,0.0,0,F,F,F,BASE64 Private Key

0 comments on commit ae5385a

Please sign in to comment.