kubernetes · mariantalla · Sep 7, 2018 · Sep 7, 2018 · Sep 12, 2018 · Sep 11, 2018
diff --git a/.boilerplate.json b/.boilerplate.json
@@ -0,0 +1,11 @@
+{
+  "dirs_to_skip" : [
+    "vendor",
+    "verify/boilerplate/testdata/"
+  ],
+  "not_generated_files_to_skip" : [
+    "verify/boilerplate/boilerplate.py",
+    "kazel/generator.go",
+    "tools/generate_crosstool/main.go"
+  ]
+}
diff --git a/.travis.yml b/.travis.yml
@@ -22,6 +22,7 @@ install:
 script:
   # Build first since we need the generated protobuf for the govet checks
   - bazel build --config=ci //...
+  - cd verify/boilerplate && python -m unittest boilerplate_test ; cd -
   - ./verify/verify-boilerplate.sh --rootdir="$(pwd)" -v
   - GOPATH="${GOPATH}:$(pwd)/bazel-bin/verify/verify-go-src-go_path" ./verify/verify-go-src.sh --rootdir "$(pwd)" -v
   - ./verify/verify-bazel.sh

diff --git a/verify/README.md b/verify/README.md
@@ -33,9 +33,45 @@ script:
 # - vendor/github.com/kubernetes/repo-infra/verify-go-src.sh --rootdir=$(pwd) -v
 ```
 
-## Verify boilerplate
+## Verify & Ensure boilerplate
+
+- `verify-boilerplate.sh`:  
+   Verifies that the boilerplate for various formats (go files, Makefile, etc.)
+   is included in each file.
+- `ensure-boilerplate.sh`:  
+   Ensure that various formats (see above) have the boilerplate included.
+
+The scripts assume the root of the repo to be two levels up of the directory
+the scripts are in.
+
+If this is not the case, you can configure the root of the reop by either
+setting `REPO_ROOT` or by calling the scripts with `--root-dir=<root>`.
+
+You can put a config file into the root of your repo named `boilerplate.json`.
+The config can look something like this:
+```json
+{
+  "dirs_to_skip" : [
+    "vendor",
+    "tools/contrib"
+  ],
+  "not_generated_files_to_skip" : [
+    "some/file",
+    "some/other/file.something"
+  ]
+}
+```
+Currently supported settings are
+- `dirs_to_skip`  
+  A list of directories which is excluded when checking or adding the headers
+- `not_generated_files_to_skip`  
+  A list of all the files contain 'DO NOT EDIT', but are not generated
+
+All other settings will be ignored.
+
+### Tests
 
-Verifies that the boilerplate for various formats (go files, Makefile, etc.) is included in each file: `verify-boilerplate.sh`. 
+To run the test, cd into the boilerplate directory and run `python -m unittest boilerplate_test`.
 
 ## Verify go source code 
 

diff --git a/verify/boilerplate/boilerplate.generatego.txt b/verify/boilerplate/boilerplate.generatego.txt
@@ -0,0 +1,16 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
diff --git a/verify/boilerplate/boilerplate.py b/verify/boilerplate/boilerplate.py
@@ -17,14 +17,13 @@
 from __future__ import print_function
 
 import argparse
+import datetime
 import difflib
 import glob
-import json
-import mmap
 import os
 import re
 import sys
-from datetime import date
+import json
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
@@ -47,10 +46,21 @@
     help="give verbose output regarding why a file does not pass",
     action="store_true")
 
+parser.add_argument(
+    "--ensure",
+    help="ensure all files which should have appropriate licence headers have them prepended",
+    action="store_true")
+
 args = parser.parse_args()
 
 verbose_out = sys.stderr if args.verbose else open("/dev/null", "w")
 
+default_skipped_dirs = ['Godeps', '.git', 'vendor']
+
+# list all the files that contain 'DO NOT EDIT', but are not generated
+default_skipped_not_generated = []
+
+
 def get_refs():
     refs = {}
 
@@ -64,7 +74,38 @@ def get_refs():
 
     return refs
 
-def file_passes(filename, refs, regexs):
+
+def is_generated_file(filename, data, regexs, files_to_skip):
+    for d in files_to_skip:
+        if d in filename:
+            return False
+
+    p = regexs["generated"]
+    return p.search(data)
+
+
+def match_and_delete(content, re):
+    match = re.search(content)
+    if match is None:
+        return content, None
+    return re.sub("", content, 1), match.group()
+
+
+def replace_specials(content, extension, regexs):
+    # remove build tags from the top of Go files
+    if extension == "go" or extension == "generatego":
+        re = regexs["go_build_constraints"]
+        return match_and_delete(content, re)
+
+    # remove shebang from the top of shell files
+    if extension == "sh":
+        re = regexs["shebang"]
+        return match_and_delete(content, re)
+
+    return content, None
+
+
+def file_passes(filename, refs, regexs, not_generated_files_to_skip):
     try:
         f = open(filename, 'r')
     except Exception as exc:
@@ -74,22 +115,17 @@ def file_passes(filename, refs, regexs):
     data = f.read()
     f.close()
 
-    basename = os.path.basename(filename)
-    extension = file_extension(filename)
-    if extension != "":
-        ref = refs[extension]
-    else:
-        ref = refs[basename]
+    ref, extension, generated = analyze_file(
+        filename, data, refs, regexs, not_generated_files_to_skip)
 
-    # remove build tags from the top of Go files
-    if extension == "go":
-        p = regexs["go_build_constraints"]
-        (data, found) = p.subn("", data, 1)
+    return file_content_passes(data, filename, ref, extension, generated, regexs)
 
-    # remove shebang from the top of shell files
-    if extension == "sh" or extension == "py":
-        p = regexs["shebang"]
-        (data, found) = p.subn("", data, 1)
+
+def file_content_passes(data, filename, ref, extension, generated, regexs):
+    if ref is None:
+        return True
+
+    data, _ = replace_specials(data, extension, regexs)
 
     data = data.splitlines()
 
@@ -106,15 +142,19 @@ def file_passes(filename, refs, regexs):
     p = regexs["year"]
     for d in data:
         if p.search(d):
-            print('File %s is missing the year' % filename, file=verbose_out)
+            if generated:
+                print('File %s has the YEAR field, but it should not be in generated file' % filename, file=verbose_out)
+            else:
+                print('File %s has the YEAR field, but missing the year of date' % filename, file=verbose_out)
             return False
 
-    # Replace all occurrences of the regex "CURRENT_YEAR|...|2016|2015|2014" with "YEAR"
-    p = regexs["date"]
-    for i, d in enumerate(data):
-        (data[i], found) = p.subn('YEAR', d)
-        if found != 0:
-            break
+    if not generated:
+        # Replace all occurrences of the regex "2014|2015|2016|2017|2018" with "YEAR"
+        p = regexs["date"]
+        for i, d in enumerate(data):
+            (data[i], found) = p.subn('YEAR', d)
+            if found != 0:
+                break
 
     # if we don't match the reference at this point, fail
     if ref != data:
@@ -128,25 +168,34 @@ def file_passes(filename, refs, regexs):
 
     return True
 
+
 def file_extension(filename):
     return os.path.splitext(filename)[1].split(".")[-1].lower()
 
-skipped_dirs = ['Godeps', 'third_party', '_gopath', '_output', '.git', 
-                'cluster/env.sh', 'vendor', 'test/e2e/generated/bindata.go',
-                'repo-infra/verify/boilerplate/test', '.glide']
 
-def normalize_files(files):
+def read_config_file(conf_path):
+    try:
+        with open(conf_path) as json_data_file:
+            return json.load(json_data_file)
+    except ValueError:
+        raise
+    except:
+        return {'dirs_to_skip': default_skipped_dirs, 'not_generated_files_to_skip': default_skipped_not_generated}
+
+
+def normalize_files(files, dirs_to_skip):
     newfiles = []
     for pathname in files:
-        if any(x in pathname for x in skipped_dirs):
+        if any(x in pathname for x in dirs_to_skip):
             continue
         newfiles.append(pathname)
     for i, pathname in enumerate(newfiles):
         if not os.path.isabs(pathname):
             newfiles[i] = os.path.join(args.rootdir, pathname)
     return newfiles
 
-def get_files(extensions):
+
+def get_files(extensions, dirs_to_skip):
     files = []
     if len(args.filenames) > 0:
         files = args.filenames
@@ -156,16 +205,15 @@ def get_files(extensions):
             # as we would prune these later in normalize_files(). But doing it
             # cuts down the amount of filesystem walking we do and cuts down
             # the size of the file list
-            for d in skipped_dirs:
+            for d in dirs_to_skip:
                 if d in dirs:
                     dirs.remove(d)
 
             for name in walkfiles:
                 pathname = os.path.join(root, name)
                 files.append(pathname)
 
-    files = normalize_files(files)
-
+    files = normalize_files(files, dirs_to_skip)
     outfiles = []
     for pathname in files:
         basename = os.path.basename(pathname)
@@ -174,29 +222,101 @@ def get_files(extensions):
             outfiles.append(pathname)
     return outfiles
 
+
+def analyze_file(file_name, file_content, refs, regexs, not_generated_files_to_skip):
+    # determine if the file is automatically generated
+    generated = is_generated_file(
+        file_name, file_content, regexs, not_generated_files_to_skip)
+
+    base_name = os.path.basename(file_name)
+    if generated:
+        extension = "generatego"
+    else:
+        extension = file_extension(file_name)
+
+    if extension != "":
+        ref = refs[extension]
+    else:
+        ref = refs.get(base_name, None)
+
+    return ref, extension, generated
+
+
+def ensure_boilerplate_file(file_name, refs, regexs, not_generated_files_to_skip):
+    with open(file_name, mode='r+') as f:
+        file_content = f.read()
+
+        ref, extension, generated = analyze_file(
+            file_name, file_content, refs, regexs, not_generated_files_to_skip)
+
+        # licence header
+        licence_header = os.linesep.join(ref)
+
+        # content without shebang and such
+        content_without_specials, special_header = replace_specials(
+            file_content, extension, regexs)
+
+        # new content, to be writen to the file
+        new_content = ''
+
+        # shebang and such
+        if special_header is not None:
+            new_content += special_header
+
+        # licence header
+        current_year = str(datetime.datetime.now().year)
+        year_replacer = regexs['year']
+        new_content += year_replacer.sub(current_year, licence_header, 1)
+
+        # actual content
+        new_content += os.linesep + content_without_specials
+
+        f.seek(0)
+        f.write(new_content)
+
+
+def get_dates():
+    years = datetime.datetime.now().year
+    return '(%s)' % '|'.join((str(year) for year in range(2014, years+1)))
+
+
 def get_regexs():
     regexs = {}
     # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
-    regexs["year"] = re.compile( 'YEAR' )
-    # dates can be 2014, 2015, 2016, ..., CURRENT_YEAR, company holder names can be anything
-    years = range(2014, date.today().year + 1)
-    regexs["date"] = re.compile( '(%s)' % "|".join(map(lambda l: str(l), years)) )
+    regexs["year"] = re.compile('YEAR')
+    # get_dates return 2014, 2015, 2016, 2017, or 2018 until the current year as a regex like: "(2014|2015|2016|2017|2018)";
+    # company holder names can be anything
+    regexs["date"] = re.compile(get_dates())
     # strip // +build \n\n build constraints
-    regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE)
+    regexs["go_build_constraints"] = re.compile(
+        r"^(// \+build.*\n)+\n", re.MULTILINE)
     # strip #!.* from shell scripts
     regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
+    # Search for generated files
+    regexs["generated"] = re.compile('DO NOT EDIT')
     return regexs
 
+
 def main():
+    config_file_path = os.path.join(args.rootdir, ".boilerplate.json")
+    config = read_config_file(config_file_path)
+
     regexs = get_regexs()
     refs = get_refs()
-    filenames = get_files(refs.keys())
+    filenames = get_files(refs.keys(), config.get('dirs_to_skip'))
+    not_generated_files_to_skip = config.get('not_generated_files_to_skip', [])
 
     for filename in filenames:
-        if not file_passes(filename, refs, regexs):
-            print(filename, file=sys.stdout)
+        if not file_passes(filename, refs, regexs, not_generated_files_to_skip):
+            if args.ensure:
+                print("adding boilerplate header to %s" % filename)
+                ensure_boilerplate_file(
+                    filename, refs, regexs, not_generated_files_to_skip)
+            else:
+                print(filename, file=sys.stdout)
 
     return 0
 
+
 if __name__ == "__main__":
-  sys.exit(main())
+    sys.exit(main())
diff --git a/verify/boilerplate/boilerplate.py.txt b/verify/boilerplate/boilerplate.py.txt
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 # Copyright YEAR The Kubernetes Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");