diff --git a/README.md b/README.md index 9548891d..8b040ba7 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,11 @@ An audio/video toolkit built with pure web technologies, planned to include crea ## Motivation -Chrome 94 released the WebCodecs API, meaning JS can now handle audio/video as well. Before this, frontend developers could only use ffmpeg.wasm in limited scenarios. Through simple tests of decoding and re-encoding videos, it was found WebCodecs has 20x the performance of ffmpeg.wasm. +Chrome 94 released the WebCodecs API, meaning JS can now handle audio/video as well. Before this, frontend developers could only use ffmpeg.wasm in limited scenarios. Through simple tests of decoding and re-encoding videos, it was found WebCodecs has 20x the performance of ffmpeg.wasm. +_WebAV is compatible with Chrome 102 and above due to the use of the new OPFS API._ -Chrome 94 开放了 WebCodecs API,意味着 JS 也能处理音视频了。在此之前,前端开发在会在有限的场景使用 ffmpeg.wasm,经过简单地测试解码和重编码视频发现,WebCodecs 性能是 ffmpeg.wasm 的 20 倍。 +Chrome 94 开放了 WebCodecs API,意味着 JS 也能处理音视频了。在此之前,前端开发在会在有限的场景使用 ffmpeg.wasm,经过简单地测试解码和重编码视频发现,WebCodecs 性能是 ffmpeg.wasm 的 20 倍。 +_WebAV 兼容 Chrome102 及以上版本,因为使用 opfs 新 API。_ This is an experimental project attempting to provide easy-to-use APIs for handling audio/video data in the browser. The project is under active development, feel free to open issues to report bugs or suggest new features. diff --git a/packages/av-cliper/demo/concat-media.ts b/packages/av-cliper/demo/concat-media.ts index c04db1d2..6c82b472 100644 --- a/packages/av-cliper/demo/concat-media.ts +++ b/packages/av-cliper/demo/concat-media.ts @@ -25,6 +25,7 @@ document.querySelector('#mp4-img')?.addEventListener('click', () => { const spr1 = new OffscreenSprite( new MP4Clip((await fetch(resList[0])).body!), ); + spr1.time.duration = 3e6; const spr2 = new OffscreenSprite( new ImgClip( @@ -69,6 +70,7 @@ document.querySelector('#mp4-img')?.addEventListener('click', () => { videoCodec: 'avc1.42E032', bgColor: 'white', // audio: false, + metaDataTags: { hello: 'world' }, }); await com.addSprite(spr1, { main: true }); diff --git a/packages/av-cliper/demo/decode-media.ts b/packages/av-cliper/demo/decode-media.ts index d262512e..e323dd02 100644 --- a/packages/av-cliper/demo/decode-media.ts +++ b/packages/av-cliper/demo/decode-media.ts @@ -114,7 +114,7 @@ document.querySelector('#decode-audio')?.addEventListener('click', () => { }); const videos = { - 'bunny.mp4': './video/bunny.mp4', + 'bunny.mp4': './video/pri-test-metadata.mp4', 'bear.mp4': './video/bear-vp9.mp4', }; document.querySelector('#decode-video')?.addEventListener('click', () => { diff --git a/packages/av-cliper/src/clips/mp4-clip.ts b/packages/av-cliper/src/clips/mp4-clip.ts index 8812a227..1b4d4a68 100644 --- a/packages/av-cliper/src/clips/mp4-clip.ts +++ b/packages/av-cliper/src/clips/mp4-clip.ts @@ -497,6 +497,7 @@ async function parseMP4Stream( Error('MP4Clip must contain at least one video or audio track'), ); } + console.log(111111, data.file.moov.udta); Log.info( 'mp4BoxFile moov ready', { diff --git a/packages/av-cliper/src/combinator.ts b/packages/av-cliper/src/combinator.ts index dc71dfca..9088c98c 100644 --- a/packages/av-cliper/src/combinator.ts +++ b/packages/av-cliper/src/combinator.ts @@ -15,6 +15,10 @@ interface ICombinatorOpts { * false 合成的视频文件中排除音轨 */ audio?: false; + /** + * 向输出的视频中写入 meta tags 数据 + */ + metaDataTags?: Record; } let COM_ID = 0; @@ -133,6 +137,7 @@ export class Combinator { sampleRate: DEFAULT_AUDIO_CONF.sampleRate, channelCount: DEFAULT_AUDIO_CONF.channelCount, }, + metaDataTags: opts.metaDataTags, }); TOTAL_COM_ENCODE_QSIZE.set(this, this.#remux.getEecodeQueueSize); diff --git a/packages/av-cliper/src/mp4-utils/index.ts b/packages/av-cliper/src/mp4-utils/index.ts index b0cd6cca..3305b820 100644 --- a/packages/av-cliper/src/mp4-utils/index.ts +++ b/packages/av-cliper/src/mp4-utils/index.ts @@ -19,6 +19,7 @@ import { EventTool } from '../event-tool'; import { SampleTransform } from './sample-transform'; import { extractFileConfig, unsafeReleaseMP4BoxFile } from './mp4box-utils'; import { tmpfile, write } from 'opfs-tools'; +import { createMetaBox } from './meta-box'; type TCleanFn = () => void; @@ -35,6 +36,7 @@ interface IRecodeMuxOpts { sampleRate: number; channelCount: number; } | null; + metaDataTags?: Record; } export function recodemux(opts: IRecodeMuxOpts): { @@ -52,6 +54,25 @@ export function recodemux(opts: IRecodeMuxOpts): { const avSyncEvtTool = new EventTool< Record<'VideoReady' | 'AudioReady', () => void> >(); + + let metaAdded = false; + const addMetadata = () => { + if (metaAdded) return; + metaAdded = true; + if (mp4file.moov == null) return; + + const udtaBox = mp4file.moov.add('udta'); + const metaBox = udtaBox.add('meta'); + const data = { hello: 'world', foo: 'bar' }; + metaBox.data = createMetaBox(data); + metaBox.size = metaBox.data.byteLength; + }; + + if (opts.metaDataTags != null) { + avSyncEvtTool.once('VideoReady', addMetadata); + avSyncEvtTool.once('AudioReady', addMetadata); + } + let vEncoder = opts.video != null ? encodeVideoTrack(opts.video, mp4file, avSyncEvtTool) diff --git a/packages/av-cliper/src/mp4-utils/meta-box.ts b/packages/av-cliper/src/mp4-utils/meta-box.ts new file mode 100644 index 00000000..0fb992c7 --- /dev/null +++ b/packages/av-cliper/src/mp4-utils/meta-box.ts @@ -0,0 +1,124 @@ +const createBoxHeader = (type: string, size: number): Uint8Array => { + const buffer = new Uint8Array(8); + const view = new DataView(buffer.buffer); + view.setUint32(0, size); // Write size as a 32-bit unsigned integer + for (let i = 0; i < 4; i++) { + buffer[4 + i] = type.charCodeAt(i); // Write type as a 4-character string + } + return buffer; +}; + +const createHdlrBox = (): Uint8Array => { + const tec = new TextEncoder(); + const handlerType = tec.encode('mdta'); + const nameBytes = tec.encode('mp4 handler'); + // header8 + ?8 + mdta4 + ?12 + nameSize + endFlag1 + const size = 8 + 8 + 4 + 12 + nameBytes.byteLength + 1; + const buffer = new Uint8Array(size); + const view = new DataView(buffer.buffer); + + // Box header + buffer.set(createBoxHeader('hdlr', size), 0); + + // Full box header (version and flags) + view.setUint32(8, 0); + + buffer.set(handlerType, 16); + buffer.set(nameBytes, 32); + + return buffer; +}; + +const createKeysBox = (keys: string[]): Uint8Array => { + const tec = new TextEncoder(); + const keyNamespace = tec.encode('mdta'); + const keyData = keys.map((key) => { + const keyBuf = tec.encode(key); + // size4 + namespace4 + keyBuf + const size = 4 + 4 + keyBuf.byteLength; + + const entryBuf = new Uint8Array(size); + const dv = new DataView(entryBuf.buffer); + dv.setUint32(0, size); + entryBuf.set(keyNamespace, 4); + entryBuf.set(keyBuf, 4 + keyNamespace.byteLength); + + return entryBuf; + }); + const keyDataSize = keyData.reduce((acc, cur) => acc + cur.byteLength, 0); + + const size = 16 + keyDataSize; // 16 bytes for the header and version/flags + const buffer = new Uint8Array(size); + const view = new DataView(buffer.buffer); + + // Box header + buffer.set(createBoxHeader('keys', size), 0); + + // Full box header (version and flags) + view.setUint32(8, 0); + view.setUint32(12, keys.length); // Entry count + + // Keys + let offset = 16; + for (const keyBuf of keyData) { + buffer.set(keyBuf, offset); + offset += keyBuf.byteLength; + } + + return buffer; +}; + +const createIlstBox = (data: Record): Uint8Array => { + const tec = new TextEncoder(); + const dataStrBuf = tec.encode('data'); + const valueData = Object.entries(data).map(([_, value], index) => { + const keyId = index + 1; // Assuming keys start from 1 + const valueBytes = tec.encode(value); + // size4 + keyId4 + valueSize4 + data4 + idx4 + ?4 + value + const entrySize = 4 + 4 + 4 + 4 + 4 + 4 + valueBytes.byteLength; + + const buffer = new Uint8Array(entrySize); + const view = new DataView(buffer.buffer); + view.setUint32(0, entrySize); + view.setUint32(4, keyId); + + view.setUint32(8, 16 + valueBytes.byteLength); + buffer.set(dataStrBuf, 12); // 'data' type + + // data idx=1 + view.setUint32(16, 1); + // Value + buffer.set(valueBytes, 24); + + return buffer; + }); + + const valueDataSize = valueData.reduce((acc, cur) => acc + cur.byteLength, 0); + const totalSizwe = 8 + valueDataSize; + const buffer = new Uint8Array(totalSizwe); + buffer.set(createBoxHeader('ilst', totalSizwe), 0); + + let offset = 8; + for (const entry of valueData) { + buffer.set(entry, offset); + offset += entry.byteLength; + } + + return buffer; +}; + +export const createMetaBox = (data: Record): Uint8Array => { + const hdlrBox = createHdlrBox(); + const keysBox = createKeysBox(Object.keys(data)); + const ilstBox = createIlstBox(data); + + const size = hdlrBox.length + keysBox.length + ilstBox.length; + const buffer = new Uint8Array(size); + + // buffer.set(createBoxHeader('meta', size), 0); + buffer.set(hdlrBox, 0); + buffer.set(keysBox, hdlrBox.length); + buffer.set(ilstBox, hdlrBox.length + keysBox.length); + + return buffer; +}; diff --git a/types/mp4box.d.ts b/types/mp4box.d.ts index db7365f1..b86e29ac 100644 --- a/types/mp4box.d.ts +++ b/types/mp4box.d.ts @@ -118,8 +118,11 @@ declare module '@webav/mp4box.js' { hdr_size: number; start: number; type: string; + data?: Uint8Array; write: (dataStream: DataStream) => void; parse: (dataStream: DataStream) => void; + add: (name: string) => BoxParser; + addEntry: (value: string, name: string) => BoxParser; } export interface TrakBoxParser extends BoxParser { @@ -258,6 +261,7 @@ declare module '@webav/mp4box.js' { moofs: MOOFBoxParser[]; moov?: MOOVBoxParser; + add: (name: string) => BoxParser; addTrack: (opts: VideoTrackOpts | AudioTrackOpts) => number; addSample: (trackId: number, buf: ArrayBuffer, sample: SampleOpts) => void; releaseUsedSamples(id: number, usedCount: number): void;