WIP Investigate failure #27

	name: Optimum TPU / Test TGI on TPU / Jetstream Pytorch

	on:
	push:
	branches: [ main, ci-ephemeral-tpu ]
	pull_request:
	branches: [ main ]
	paths:
	- "text-generation-inference/**"
	# This can be used to trigger workflow from the web interface
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	do-the-job:
	name: Run TGI tests - Jetstream Pytorch
	runs-on: gcp-ct5lp-hightpu-1t-usc1-public-211
	container:
	image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:r2.4.0_3.10_tpuvm
	options: --shm-size "16gb" --ipc host --privileged ${{ vars.SINGLE_TPU_ENV }}
	env:
	PJRT_DEVICE: TPU
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Build and test TGI server
	run: \|
	make test_installs jetstream_requirements tgi_server
	find text-generation-inference/ -name "text_generation_server-*whl" -exec python -m pip install {} \;
	JETSTREAM_PT=1 HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_TPU_CI }} \
	python -m pytest -sv text-generation-inference/tests -k "jetstream and TinyLlama and greedy"

Provide feedback