first commit

speechmatics · Oct 11, 2023 · 26bb019 · 26bb019
commit 26bb019
Show file tree

Hide file tree

Showing 20 changed files with 1,302 additions and 0 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,21 @@
+name: Lint
+on:
+  push:
+    branches: "*"
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+      - name: Run lint tools
+        run: |
+          make lint-local
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,27 @@
+name: Build and Publish Docker Image
+on:
+  release:
+    types: [released]
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Build Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./Dockerfile
+          target: production
+          push: false
+          tags: |
+            "${{ vars.ACR_ADDRESS }}/${{ vars.IMAGE_NAME }}:${{ github.ref_name }}"
+      - name: Login to Azure ACR
+        uses: azure/docker-login@v1
+        with:
+          login-server: ${{ vars.ACR_ADDRESS }}
+          username: ${{ secrets.ACR_USERNAME }}
+          password: ${{ secrets.ACR_PASSWORD }}
+      - name: Push Docker image to Azure ACR
+        run: docker push ${{ vars.ACR_ADDRESS }}/${{ vars.IMAGE_NAME }}:${{ github.ref_name }}
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -0,0 +1,21 @@
+name: Unit tests
+on:
+  push:
+    branches: "*"
+jobs:
+  unittests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+      - name: Run unit tests
+        run: |
+          make unittest-local
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,33 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+dist/
+build/
+*.egg-info/
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv/
+.ENV/
+
+# IDEs and editors
+.idea/
+.vscode/
+*.sublime-project
+*.sublime-workspace
+
+# Logs and databases
+*.log
+*.sqlite3
+*.db
+
+# Other
+*.pyc
+.DS_Store
+.env
+.coverage
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,36 @@
+FROM python:3.11-alpine as builder
+RUN mkdir /install
+WORKDIR /install
+COPY requirements.txt /requirements.txt
+RUN pip install --prefix="/install" -r /requirements.txt
+
+
+FROM python:3.11-alpine as production
+RUN apk upgrade -U && apk add ffmpeg
+COPY --from=builder /install /usr/local
+COPY ./stream_transcriber /stream_transcriber
+WORKDIR /
+EXPOSE 8765
+EXPOSE 8000
+ENTRYPOINT ["python"]
+CMD ["-m", "stream_transcriber.server"]
+
+FROM production as base-dev
+RUN apk add --no-cache make
+COPY ./requirements-dev.txt /requirements-dev.txt
+RUN pip install -r /requirements-dev.txt
+
+FROM base-dev as lint
+WORKDIR /
+COPY ./Makefile /Makefile
+COPY ./setup.cfg /setup.cfg
+COPY ./stream_transcriber /stream_transcriber
+ENTRYPOINT ["make", "lint-local"]
+
+FROM base-dev as unittest
+WORKDIR /
+COPY ./stream_transcriber /stream_transcriber
+COPY ./unittests /unittests
+COPY ./Makefile /Makefile
+COPY ./pytest.ini /pytest.ini
+ENTRYPOINT [ "make", "unittest-local" ]
diff --git a/Makefile b/Makefile
@@ -0,0 +1,40 @@
+.DEFAULT_GOAL := all
+IMAGE_NAME ?= stream-demo-server
+TAG ?= manual
+DOCKER := DOCKER_BUILDKIT=1 docker
+SOURCES := stream_transcriber/
+ACR_ADDRESS := speechmatics.azurecr.io
+ACR_IMAGE_NAME := ${ACR_ADDRESS}/${IMAGE_NAME}
+
+.PHONY: all lint build publish format build-linux-amd64 lint-local unittest unittest-local
+
+all: lint build
+
+lint:
+	${DOCKER} build -t ${IMAGE_NAME}:${TAG}-lint --target lint .
+	${DOCKER} run --rm --name ${IMAGE_NAME}-lint ${IMAGE_NAME}:${TAG}-lint
+lint-local:
+	black --check --diff ${SOURCES}
+	pylint ${SOURCES}
+	pycodestyle ${SOURCES}
+
+format:
+	black ${SOURCES}
+
+unittest:
+	${DOCKER} build -t ${IMAGE_NAME}:${TAG}-unittest --target unittest .
+	${DOCKER} run --rm --name ${IMAGE_NAME}-unittest ${IMAGE_NAME}:${TAG}-unittest
+unittest-local:
+	AUTH_TOKEN=token pytest -v unittests
+
+build:
+	${DOCKER} build -t ${IMAGE_NAME}:${TAG} --target production .
+
+# Build locally an image for linux/amd64
+build-linux-amd64:
+	${DOCKER} build --platform linux/amd64 -t ${IMAGE_NAME}:${TAG} --target production .
+
+publish:
+	docker tag ${IMAGE_NAME}:${TAG} ${ACR_IMAGE_NAME}:${TAG}
+	docker image inspect ${ACR_IMAGE_NAME}:${TAG}
+	docker push ${ACR_IMAGE_NAME}:${TAG}
diff --git a/README.md b/README.md
@@ -0,0 +1,48 @@
+# Stream Radio Server
+
+A Python Websocket Server for transcribing/translating multiple radio streams and allowing clients to subscribe to the results.
+
+## Getting Started
+
+Install all the required dependencies with:
+
+```
+brew install ffmpeg
+pip3 install -r requirements.txt
+```
+
+## Running
+
+Start the server with
+
+```bash
+python3 -m stream_transcriber.server --port 8765
+```
+
+Connect with your client to e.g. `ws://localhost:8765`, 
+with https://github.com/vi/websocat this can be done with:
+```bash
+websocat ws://127.0.0.1:8765
+```
+> {"message": "Initialised", "info": "Waiting for message specyifing desired stream url"}
+
+The server expects an initial JSON message with the desired language to start streaming:
+```json
+{"name": "english"}
+```
+
+Now the client will receive audio chunks and messages in JSON format until the stream ends or the client disconnects.
+
+## Running tests
+
+Run the following command
+
+```bash
+make unittest
+```
+
+The above command runs the tests in a docker container with the intended Python version and all dependencies installed. For running the tests directly on your computer run the following command
+
+```bash
+make unittest-local
+```
diff --git a/profiling/README.md b/profiling/README.md
@@ -0,0 +1,48 @@
+# Profiling the server under load
+
+## Dependencies
+
+In addition to the dependencies needed to run the server, you'll need the following:
+
+
+- cli tools:
+    - k6
+    - ffmpeg
+- Python packages:
+    - memory_profiler
+    - matplotlib
+
+## Run profiling
+
+We can collect some statistics while the server is under load:
+
+1. Start the server with mprofile to get an evolution of memory consumption over time. It'll track also memory of child processes (ffmpeg)
+
+```bash
+SM_MANAGEMENT_PLATFORM_URL='<URL_TO_MGMT_PLATFORM>' AUTH_TOKEN='<API_KEY_HERE>' mprof run --multiprocess python3 -m stream_transcriber.server --port 8765
+```
+
+2. A simple way to keep an eye of cpu usage while the server is running. In another terminal:
+
+```bash
+# 1. Find the pid of the server
+ps | grep server.py
+
+# 2. Watch snapshots every 1s
+watch -n 1 'ps -p <pid> -o %cpu,%mem,cmd'
+```
+
+1. Generate some load using [k6](https://k6.io)
+
+```bash
+k6 run profiling/client-load.js
+```
+NOTE: for really high numbers of clients you might hit the max number of file descriptors allowed to be open. Find how to change it for your OS. In MacOS the number can be retrieved with `ulimit -n`. It can be changed with `ulimit -n <amount>`
+
+4. The snapshots every 1 second of cpu and mem will be showing in the separate terminal.
+
+5. To visualize the graph of memory consumption over time, Ctrl + C in the terminal in which the server is running to stop it from running. Now use:
+
+```bash
+mprof plot
+```
diff --git a/profiling/client-load.js b/profiling/client-load.js
@@ -0,0 +1,34 @@
+import ws from 'k6/ws';
+import { check } from 'k6';
+
+export const options = {
+  discardresponsebodies: true,
+  scenarios: {
+    users: {
+      executor: "ramping-vus",
+      startvus: 1,
+      stages: [
+        { duration: '1m', target: 1 },
+        { duration: '2m', target: 200 },
+        { duration: '5m', target: 200 },
+        { duration: '2m', target: 1 },
+        { duration: '1m', target: 1 },
+      ],
+    },
+  },
+};
+
+export default function () {
+  const url = 'ws://127.0.0.1:8765';
+  const res = ws.connect(url, function (socket) {
+    socket.on('open', function open() {
+      console.log('connected')
+      const streams = ["english", "german", "french", "spanish"];
+      const random = Math.floor(Math.random() * streams.length);
+      socket.send(`{"name": "${streams[random]}"}`)
+    });
+    // socket.on('message', (data) => console.log('Message received: ', data));
+    socket.on('close', () => console.log('disconnected'));
+  });
+  check(res, { 'status is 101': (r) => r && r.status === 101 });
+}
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+addopts = -ra --full-trace --cov=stream_transcriber --cov-branch -o asyncio_mode=auto
+pythonpath = stream_transcriber
+testpaths = unittests
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,6 @@
+pycodestyle==2.11.0
+pylint==3.0.1
+black==23.9.1
+pytest==7.4.2
+pytest-asyncio==0.21.1
+pytest-cov==4.1.0
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+websockets~=11.0.3
+httpx[http2]~=0.23
+polling2~=0.5
+toml~=0.10.2
+prometheus-client~=0.16.0
+speechmatics-python~=1.9.0
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[pycodestyle]
+max-line-length = 120
diff --git a/stream_transcriber/__init__.py b/stream_transcriber/__init__.py