Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: generate alt-text with ai #2875

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: generate alt-text with ai
Shinigami92 committed May 28, 2024
commit 83bcf82f0e5f8f418047cb3b77b675a472500715
78 changes: 78 additions & 0 deletions components/publish/PublishAttachment.vue
Original file line number Diff line number Diff line change
@@ -21,6 +21,74 @@ const maxDescriptionLength = 1500
const isEditDialogOpen = ref(false)
const description = ref(props.attachment.description ?? '')

const generationInProgress = ref(false)

async function generateAltText() {
// eslint-disable-next-line no-console
console.log(JSON.parse(JSON.stringify(props)))

const url = props.attachment.url

if (!url)
return

if (generationInProgress.value)
return

// TODO @Shinigami92 2024-05-27: Show confirm dialog warning message that a model with ~250MiB will be downloaded

generationInProgress.value = true

try {
const { pipeline, RawImage } = await import('@xenova/transformers')

const pipe = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning')

// const imageElement = document.querySelector<HTMLImageElement>('.dialog-main img.status-attachment-image')!
const imageElement = new Image()
imageElement.crossOrigin = 'Anonymous'
imageElement.src = `${url}?request-with-cors`
await imageElement.decode()

const dataUrl = new Promise<string>((resolve) => {
imageElement.onload = () => {
const canvas = document.createElement('canvas')
canvas.width = imageElement.width
canvas.height = imageElement.height

const ctx = canvas.getContext('2d')!
ctx.drawImage(imageElement, 0, 0)

// TODO @Shinigami92 2024-05-28: Fix "Uncaught DOMException: Failed to execute 'toDataURL' on 'HTMLCanvasElement': Tainted canvases may not be exported."
const dataUrl = canvas.toDataURL(`image/${url.split('.').pop()!}`)

resolve(dataUrl)
}
})

const img = await RawImage.fromURL(await dataUrl)

const out = await pipe(img)

// eslint-disable-next-line no-console
console.debug(out)

const firstOut = out?.[0]

if (!firstOut || Array.isArray(firstOut))
return

description.value = firstOut.generated_text
}
catch (error) {
console.error(error)
// TODO @Shinigami92 2024-05-27: Display error message to the user, so they know that something went wrong
}
finally {
generationInProgress.value = false
}
}

function toggleApply() {
isEditDialogOpen.value = false
emit('setDescription', description.value)
@@ -62,6 +130,16 @@ function toggleApply() {
<div flex flex-row-reverse>
<PublishCharacterCounter :length="description.length" :max="maxDescriptionLength" />
</div>

<!-- TODO @Shinigami92 2024-05-27: Style the button in the upper right corner of the textarea -->
<button type="button" btn-outline flex="~ gap2 center" :disabled="generationInProgress" @click="generateAltText">
<span block i-ri:sparkling-2-line />
{{ $t('action.generate-alt-text') }}
<span v-if="generationInProgress" aria-hidden="true" block animate-spin preserve-3d>
<span block i-ri:loader-2-fill aria-hidden="true" />
</span>
</button>

<button btn-outline :disabled="description.length > maxDescriptionLength" @click="toggleApply">
{{ $t('action.apply') }}
</button>
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -66,6 +66,7 @@
"@vueuse/math": "^10.8.0",
"@vueuse/motion": "2.1.0",
"@vueuse/nuxt": "^10.8.0",
"@xenova/transformers": "^2.17.1",
"blurhash": "^2.0.5",
"browser-fs-access": "^0.35.0",
"chroma-js": "^2.4.2",
Loading