Capturing changes to pre-commit and testing commit hook correction. Related to #3297. Commit ready to merge.

- Legacy-Id: 19056
This commit is contained in:
Robert Sparks 2021-06-02 15:00:55 +00:00
parent ffd82f1ce8
commit 14df71e4e7

View file

@ -1,162 +1,69 @@
#!/usr/bin/env python
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# $Id$
#
# Prevents some SHA-1 collisions to be commited
# Test fo the 320 byte prefix found on https://shattered.io/
# If the files are committed in the same transaction, svnlook
# will error out itself due to the apparent corruption in the
# candidate revision
"""
An SVN pre-commit hook which requires that commits either are marked as
whitespace cleanup commits, and contain no non-whitespace changes, or
leave whitespace alone on lines without code changes.
"""
REPOS="$1"
TXN="$2"
SVNLOOK=/usr/bin/svnlook
YEAR=$(date +%Y)
import os
import sys
import difflib
#import debug
from pysvn import Client, Transaction
$SVNLOOK changed -t "$TXN" "$REPOS"
if [ $? -ne 0 ]; then
echo "svnlook failed, possible SHA-1 collision" >&2
exit 2
fi
prog = os.path.basename(sys.argv[0])
FILES=$($SVNLOOK changed -t "$TXN" "$REPOS" | grep -Ev '^D ' | /usr/bin/awk '{print $2}')
for FILE in $FILES; do
if [ -f $FILE ]; then
# Check against known sha-1 collision attack. Someone committing 2 different files with this
# known hash collision could otherwise break the repository.
PREFIX=$($SVNLOOK cat -t "$TXN" "$REPOS" "$FILE" | head -c320 | /usr/bin/sha1sum | cut -c-40)
if [ "$PREFIX" = 'f92d74e3874587aaf443d1db961d4e26dde13e9c' ]; then
echo "known SHA-1 collision rejected" >&2
exit 3
fi
def die(msg):
sys.stderr.write("\n%s: Error: %s\n" % (prog, msg))
sys.exit(1)
# Verify copyright year
if [[ $FILE == */ietf/*.py || -s $FILE ]]; then
$SVNLOOK cat -t "$TXN" "$REPOS" "$FILE" | head -n 3 | grep -q "Copyright .*IETF Trust .*$YEAR.*" || {
echo "
Bad or missing copyright note in $FILE.
Expected 'Copyright The IETF Trust ... $YEAR, All Rights Reserved',
(or similar) at the start of the file.
if len(sys.argv) <= 1:
die("Expected arguments: REPOSITORY TRANSACTION, found none")
For bulk correction of copyright statements, try bin/check-copyright with
patching:
if len(sys.argv) <= 2:
die( "Expected arguments: REPOSITORY TRANSACTION, found only '%s'" % sys.argv[1])
\$ bin/check-copyright -p \$(svn st | cut -c 9- | grep '\.py\$' ) | patch -p0
repo = sys.argv[1]
txname = sys.argv[2]
tx = Transaction(repo, txname)
client = Client()
is_whitespace_cleanup = "whitespace cleanup" in tx.revpropget("svn:log").lower()
def normalize(s):
return s.rstrip().expandtabs()
def normalize_sequence(seq):
o = []
for l in seq:
o.append(normalize(l))
return o
def normalize_file_end(seq):
while True and seq:
if seq[-1].strip() == "":
del seq[-1]
else:
break
return seq
def count(gen):
return sum(1 for _ in gen)
# Function with side effects. Acts on global varaibles
def inc_ab(flag):
global a, b
if flag == ' ':
a += 1; b += 1
elif flag == '-':
a += 1
elif flag == '+':
b += 1
elif flag == '?':
pass
else:
raise ValueError("Unexpected ndiff mark: '%s' in: %s" % (flag, plain_diff[i]))
def get_chunks(unidiff):
if not unidiff:
return [], []
chunks = []
chunk = []
intro = unidiff[0:2]
for line in unidiff[2:]:
if line.startswith("@@"):
if chunk:
chunks.append(chunk)
chunk = [line]
else:
chunk.append(line)
chunks.append(chunk)
return intro, chunks
changes = tx.changed()
issues = {}
context = 3
for path in changes:
action, kind, mod, propmod = changes[path]
# Don't try to diff added or deleted files, on ly changed text files
if not (mod and action == "R"):
continue
# Don't try do diff binary files
mimetype = tx.propget("svn:mime-type", path)
if mimetype and not mimetype.startswith("text/"):
continue
new = tx.cat(path).splitlines()
old = client.cat("file://"+os.path.join(repo,path)).splitlines()
# Added trailing space can mess up the comparison -- eliminate it
new = normalize_file_end(new)
old = normalize_file_end(old)
plain_diff = list(difflib.unified_diff(old, new, "%s (repository)"%path, "%s (commit)"%path, lineterm="" ))
old = normalize_sequence(old)
new = normalize_sequence(new)
white_diff = list(difflib.unified_diff(old, new, "%s (repository)"%path, "%s (commit)"%path, lineterm=""))
plain_count = len(plain_diff)
white_count = len(white_diff)
# for i in range(len(white_diff)):
# sys.stderr.write("%-80s | %-80s\n" % (normalize(plain_diff[i][:80]), normalize(white_diff[i][:80])))
if white_count != plain_count and not is_whitespace_cleanup:
intro, plain_chunks = get_chunks(plain_diff)
intro, white_chunks = get_chunks(white_diff)
deletes = []
for chunk in white_chunks:
for i in range(len(plain_chunks)):
if chunk == plain_chunks[i]:
deletes += [i]
deletes.reverse()
for i in deletes:
del plain_chunks[i]
issue = intro
for chunk in plain_chunks:
issue += chunk
if len(plain_chunks) > 1:
are = "are"; s = "s"; an = ""
else:
are = "is"; s = ""; an = "an "
issues[path] = issue
if white_count != 0 and is_whitespace_cleanup:
intro, white_chunks = get_chunks(white_diff)
if len(white_chunks) > 1:
are = "are"; s = "s"; an = ""
else:
are = "is"; s = ""; an = "an "
issues[path] = white_diff
if issues:
if is_whitespace_cleanup:
die("It looks as if there are non-whitespace changes in\n"
"this commit, but it was marked as a whitespace cleanup commit.\n\n"
"Here %s the diff chunk%s with unexpected change%s:\n\n%s\n\n"
"Declining the commit due to a mix of code and spaces-only changes. Please\n"
"avoid mixing whitespace-only changes with code changes. See details above." %
(are, s, s, '\n\n'.join([ '\n'.join(issues[path]) for path in issues ]))
)
else:
die("It looks as if there are spaces-only changes in this\n"
"commit, but it was not marked as a whitespace cleanup commit.\n\n"
"Here %s the diff chunk%s with unexpected change%s:\n\n%s\n\n"
"Declining the commit due to a mix of code and spaces-only changes. Please\n"
"avoid mixing whitespace-only changes with code changes. See details above." %
(are, s, s, '\n\n'.join([ '\n'.join(issues[path]) for path in issues ]))
)
sys.exit(0)
" >&2
exit 3
}
fi
fi
done