Skip to content

Commit

Permalink
Merge branch 'nemo2_batcheval' into huiyingl/nemo2sftpeft_notebook
Browse files Browse the repository at this point in the history
Signed-off-by: HuiyingLi <willwin.lee@gmail.com>
  • Loading branch information
HuiyingLi committed Nov 14, 2024
2 parents 5f92b9d + e1b2cfb commit 42e4e9c
Show file tree
Hide file tree
Showing 406 changed files with 240,524 additions and 5,371 deletions.
16 changes: 14 additions & 2 deletions .github/workflows/_test_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,34 @@ on:
log:
description: Last 2000 characters of the test step's log
value: ${{ jobs.main.outputs.log }}
potential_infra_failure:
description: Boolean flag when infra-related keyword spotted in logs.
value: ${{ jobs.main.outputs.potential_infra_failure }}
jobs:

main:
runs-on: ${{ inputs.RUNNER }}
outputs:
conclusion: ${{ steps.main.conclusion }}
log: ${{ steps.main.outputs.log }}
potential_infra_failure: ${{ steps.main.outputs.potential_infra_failure }}
steps:
- name: Docker system cleanup
run: |
docker system prune -a --filter "until=48h" --force || true
- name: Docker pull image
run: |
docker pull nemoci.azurecr.io/nemo_container_${{ github.run_id }}
docker pull nemoci.azurecr.io/nemo_container:${{ github.run_id }}
- name: Start container
run: |
docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
ARG=("")
if [[ "${{ inputs.RUNNER }}" != *cpu* ]]; then
ARG=("--runtime=nvidia --gpus all")
fi
docker run --rm -d --name nemo_container_${{ github.run_id }} ${ARG[@]} --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
- id: main
name: Run main script
Expand All @@ -70,6 +79,9 @@ jobs:
echo "log=$(tail -c 2000 err.log | base64 -w 0)" >> "$GITHUB_OUTPUT"
potential_infra_failure=$(cat err.log | grep -Eqiw "device" && echo true || echo false)
echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT"
exit $EXIT_CODE
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
Expand Down
45 changes: 45 additions & 0 deletions .github/workflows/build-test-publish-wheel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Build, test, and publish a PyPi wheel (to testpypi)

on:
push:
branches:
- main
- 'r**'

defaults:
run:
shell: bash -x -e -u -o pipefail {0}

jobs:
build-test-publish-wheel:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.7.0
with:
image-name: nemo_container
dockerfile: Dockerfile.ci
image-label: nemo-core
build-args: |
IMAGE_LABEL=nemo-core
prune-filter-timerange: 24h
dry-run: true
python-package: nemo
container-workdir: /workspace
environment: public
secrets:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
136 changes: 6 additions & 130 deletions .github/workflows/cherry-pick-release-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,133 +6,9 @@ on:
- main

jobs:
main:
runs-on: ubuntu-latest
environment:
name: main
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
token: ${{ secrets.PAT }}


- name: Cherry pick
env:
GH_TOKEN: ${{ secrets.PAT }}
run: |
set -x
set +e
git config --global user.email "nemo-bot@nvidia.com"
git config --global user.name "NeMo Bot"
SHA=$(git rev-list --no-merges -n 1 HEAD)
MESSAGE=$(git log -n 1 --pretty=format:%s $SHA)
PR_ID=$(echo $MESSAGE | awk -F'#' '{print $2}' | awk -F')' '{print $1}' )
USERNAME=$(git log -n 1 --pretty=format:%ae $SHA | awk -F'@' '{print $1}')
PR=$(curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls/$PR_ID)
PR_TITLE=$(echo -E $PR | jq '.title' | tr -d '"')
LABELS=$(echo -E $PR | jq '.labels | [.[].name] | join(",")' | tr -d '"')
AUTHOR=$(echo -E $PR | jq '.user.login' | tr -d '"')
TARGET_BRANCHES=$(echo "$LABELS" | grep -o 'r[^,]*')
if [[ $TARGET_BRANCHES == '' ]]; then
echo Nothing to cherry-pick
exit 0
fi

echo $TARGET_BRANCHES | while read -r RELEASE_BRANCH ; do
TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$RELEASE_BRANCH)" != "" ]] && echo true || echo false)

if [[ "$TARGET_BRANCH_EXISTS_OK" == "false" ]]; then
echo Release branch does not yet exist, will not cherry-pick
continue
fi

(
git fetch origin $RELEASE_BRANCH:$RELEASE_BRANCH
git switch --force-create cherry-pick-$PR_ID-$RELEASE_BRANCH $RELEASE_BRANCH
git cherry-pick $SHA
git push -u origin --force cherry-pick-$PR_ID-$RELEASE_BRANCH
git checkout ${CI_DEFAULT_BRANCH:-main}
)

CHERRYPICK_SUCCESSFUL=$?

if [[ $CHERRYPICK_SUCCESSFUL -eq 0 ]]; then
PR_URL="https://github.com/NVIDIA/NeMo/pull/$PR_ID"

PAYLOAD=$(jq \
-n \
-c \
--arg TITLE "Cherry pick \`$PR_TITLE ($PR_ID)\` into \`$RELEASE_BRANCH\`" \
--arg HEAD "cherry-pick-$PR_ID-$RELEASE_BRANCH" \
--arg RELEASE_BRANCH "$RELEASE_BRANCH" \
--arg BODY "[🤖]: Hi @$AUTHOR 👋,<br><br>we've cherry picked #$PR_ID into \`$RELEASE_BRANCH\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!" \
'{
"title": $TITLE,
"head": $HEAD,
"base": $RELEASE_BRANCH,
"body": $BODY
}'
)

NEW_PR=$(curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls \
-d "$PAYLOAD")

NEW_PR_ID=$(echo -E $NEW_PR | jq '.number')
curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/pulls/$NEW_PR_ID/requested_reviewers \
-d '{"reviewers":["'$AUTHOR'"]}'

curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GH_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/issues/$NEW_PR_ID/labels \
-d '{"labels":["Run CICD", "cherry-pick"]}'

else
URL="https://github.com/NVIDIA/NeMo/pull/$PR_ID"

MESSAGE='{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":alert: Cherrypick bot 🤖: Hey <@'$USERNAME'>: Cherry-pick of <'$URL'|#'$PR_ID'> failed (3-way merge impossible). Please resolve manually and create a PR.\n\ncc: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>"
}
}
]
}'

curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${{ secrets.SLACK_WEBHOOK }}

fi

done



env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
cherry-pick:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.6.0
secrets:
PAT: ${{ secrets.PAT }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
Loading

0 comments on commit 42e4e9c

Please sign in to comment.