Skip to content

Commit

Permalink
chore: Diff-tool -> Comparing directory files (googleapis#8447)
Browse files Browse the repository at this point in the history
  • Loading branch information
ddixit14 authored and suztomo committed Oct 4, 2022
1 parent 4b56d33 commit 26f9fc5
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 0 deletions.
72 changes: 72 additions & 0 deletions generation/diff_directory.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
## This is a helper script invoked by ./generation/diff_files.sh
## All the inputs to this script are provided by diff_files.sh
## You do not need to do anything for this script to run.
## Inputs provided in sequential order are :
## 1. ${mavenURL} -> The URL for artifact's latest version directory on maven central
## 2. ${sonatypeURL} -> The URL for artifact's staging directory on google sonatype
## 3. ${artifactId} -> self-explanatory
## 4. ${groupId} -> artifact's groupId
## output for this script are 2 files:
# 1. diff-files-summary.txt : This will show success for artifacts which have same files on maven-central and sonatype,
# and if they differ, it will show a diff failure message along with the names of the files that differ.
# 2. total-diff.txt : For every artifact, this will show 4 things:
# a. Sonatype directory URL
# b. Files that exist on sonatype (with version omitted, since we only care about the file generated)
# c. Maven directory URL
# d. Files that exist on Maven (with version omitted, since we only care about the file generated)

mavenCentralURL=$1
sonatypeURL=$2
artifactId=$3
groupId=$4

wget -O sonatypeFile --recursive -nd --no-parent ${sonatypeURL}

##why --header="User-Agent: ? -> Maven central denies CLI requests to browse a directory URL, so imitating a browser's behaviour by using this header.
wget -O mavenFile --referer --recursive -nd --no-parent \
--header="User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" \
${mavenCentralURL}

sed -n 's/.*href="\([^"]*\).*/\1/p' mavenFile >mavenContents.txt
sed -n 's/.*href="\([^"]*\).*/\1/p' sonatypeFile >sonatypeContents.txt

awk "/${groupId}/" sonatypeContents.txt >temp.txt

if [[ "${groupId}" = *api* ]]; then
cat temp.txt | while read line; do
echo ${line} | awk -F '[/]' '{print $13}' | sed 's/[0-9]*//g' >>finalSonatype.txt
done
else
cat temp.txt | while read line; do
echo ${line} | awk -F '[/]' '{print $12}' | sed 's/[0-9]*//g' >>finalSonatype.txt
done
fi

cat mavenContents.txt | while read line; do
echo ${line} | sed 's/[0-9]*//g' >>finalMaven.txt
done
sed -i '' '1d' finalMaven.txt

echo "###################################################################################################################################" >>total-diff.txt
echo "----${artifactId} Sonatype files : ${sonatypeURL}" >>total-diff.txt
cat finalSonatype.txt >>total-diff.txt
echo "----${artifactId} Maven files : ${mavenCentralURL}" >>total-diff.txt
cat finalMaven.txt >>total-diff.txt

echo "--------------------------------------------------------------------------------------------" >>diff-files-summary.txt
if diff finalMaven.txt finalSonatype.txt >/dev/null; then
echo -e "${artifactId} File match success" >>diff-files-summary.txt
else
echo "---------------------------------^NEW-DIFF-FOUND^-----------------------------------------" >>diff-files-summary.txt
echo "${artifactId} diff:" >>diff-files-summary.txt
diff finalMaven.txt finalSonatype.txt >>diff-files-summary.txt
fi

rm -f mavenFile
rm -f sonatypeFile
rm -f mavenContents.txt
rm -f sonatypeContents.txt
rm -f finalSonatype.txt
rm -f finalMaven.txt
rm -f temp.txt
102 changes: 102 additions & 0 deletions generation/diff_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/bin/bash
#run this script from google-cloud-java directory
# google-cloud-java$ ./generation/diff_files.sh
# not using set -e because failing diff command is ok
# latest repositories Id -> comgooglecloud-5570, comgoogleapi-5038, comgoogleanalytics-1052
# this script must be run on a branch which does not have any snapshot versions in it

## HOW TO USE THIS SCRIPT ##
# 1. Run the stage job for google-cloud-java, on a branch which does not have any snapshot versions in it.
# 2. Search the stage job logs for
# a. 'comgooglecloud' (the result will be like "comgooglecloud-5570")
# b. 'comgoogleapi' (the result will be like "comgoogleapi-5038")
# c. 'comgoogleanalytics' (the result will be like "comgoogleanalytics-1052")
# 3. Update them below under the variables `cloudRepoId`, `apiRepoId`, `analyticsRepoId`
# 4. Run ./generation/diff_files.sh

# Output of this script are 2 files (actually generated by diff_directory.sh) ->
# 1. diff-files-summary.txt : This will show success for artifacts which have same files on maven-central and sonatype,
# and if they differ, it will show a diff failure message along with the names of the files that differ.
# 2. total-diff.txt : For every artifact, this will show 4 things:
# a. Sonatype directory URL
# b. Files that exist on sonatype (with version omitted, since we only care about the file generated)
# c. Maven directory URL
# d. Files that exist on Maven (with version omitted, since we only care about the file generated)
# This script calls ./generation/diff_directory.sh for every pair, but you do not need to do anything for it. You just need to run this script.
# Search total-diff.txt for any artifact, and it will show you a complete scenario, what all files exist etc etc.

cloudRepoId=comgooglecloud-5570
apiRepoId=comgoogleapi-5038
analyticsRepoId=comgoogleanalytics-1052

#-maxDepth 2 will just loop over all the packages we have, like java-vision etc, not maven submodules within it
#-maxDepth 3 and 4 output same number of modules (more than 2 ofcourse) so 3 is covering all the modules
for module in $(find . -mindepth 1 -maxdepth 3 -name pom.xml | sort | xargs dirname); do

if [[ "${module}" = *google-cloud-gapic-bom ]] || [[ "${module}" = *CoverageAggregator ]]; then
continue
fi

if [[ "${module}" = *samples* ]]; then
continue
fi

#these modules do not exist on maven-central yet
if [[ "${module}" = *beyondcorp* ]]; then
continue
fi

pom_file="${module}/pom.xml"
groupId_line=$(grep --max-count=1 'groupId' "${pom_file}")
artifactId_line=$(grep --max-count=1 'artifactId' "${pom_file}")
version_line=$(grep --max-count=1 'x-version-update' "${pom_file}")

#strip off everything from version line except digits and . to get the version
version=$(echo "$version_line" | cut -d '>' -f 2 | cut -d '<' -f 1)

prefix=" <groupId>"
suffix="</groupId>"
string=${groupId_line}
new_string=${string#"$prefix"}
groupId=${new_string%"$suffix"}

prefix=" <artifactId>"
suffix="</artifactId>"
string=${artifactId_line}
new_string=${string#"$prefix"}
artifactId=${new_string%"$suffix"}

if [[ "${groupId}" == *grafeas* ]]; then
continue
fi

if [[ "${groupId}" == *google.cloud* ]]; then
maven_version=$(curl -s "https://repo1.maven.org/maven2/com/google/cloud/${artifactId}/maven-metadata.xml" | grep --max-count=1 'latest')
maven_latest_version=$(echo "$maven_version" | cut -d '>' -f 2 | cut -d '<' -f 1)

sonatypeURL="https://google.oss.sonatype.org/content/repositories/${cloudRepoId}/com/google/cloud/${artifactId}/${version}"
mavenURL="https://repo1.maven.org/maven2/com/google/cloud/${artifactId}/${maven_latest_version}"

./generation/diff_directory.sh ${mavenURL} ${sonatypeURL} ${artifactId} ${cloudRepoId}
fi

if [[ "${groupId}" == *grpc* ]]; then
maven_version=$(curl -s "https://repo1.maven.org/maven2/com/google/api/grpc/${artifactId}/maven-metadata.xml" | grep --max-count=1 'latest')
maven_latest_version=$(echo "$maven_version" | cut -d '>' -f 2 | cut -d '<' -f 1)

sonatypeURL="https://google.oss.sonatype.org/content/repositories/${apiRepoId}/com/google/api/grpc/${artifactId}/${version}"
mavenURL="https://repo1.maven.org/maven2/com/google/api/grpc/${artifactId}/${maven_latest_version}"

./generation/diff_directory.sh ${mavenURL} ${sonatypeURL} ${artifactId} ${apiRepoId}
fi

if [[ "${groupId}" == *analytics* ]]; then
maven_version=$(curl -s "https://repo1.maven.org/maven2/com/google/analytics/${artifactId}/maven-metadata.xml" | grep --max-count=1 'latest')
maven_latest_version=$(echo "$maven_version" | cut -d '>' -f 2 | cut -d '<' -f 1)

sonatypeURL="https://google.oss.sonatype.org/content/repositories/${analyticsRepoId}/com/google/analytics/${artifactId}/${version}"
mavenURL="https://repo1.maven.org/maven2/com/google/analytics/${artifactId}/${maven_latest_version}"

./generation/diff_directory.sh ${mavenURL} ${sonatypeURL} ${artifactId} ${analyticsRepoId}
fi
done

0 comments on commit 26f9fc5

Please sign in to comment.