Skip to content

Commit

Permalink
Merge pull request #137 from bilibili/feat/write-meta-data
Browse files Browse the repository at this point in the history
Feat/write meta data [v0.11.0]
  • Loading branch information
hughfenghen authored Jun 14, 2024
2 parents 7694db1 + 5103152 commit 7b8e814
Showing 8 changed files with 162 additions and 3 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -6,9 +6,11 @@ An audio/video toolkit built with pure web technologies, planned to include crea

## Motivation

Chrome 94 released the WebCodecs API, meaning JS can now handle audio/video as well. Before this, frontend developers could only use ffmpeg.wasm in limited scenarios. Through simple tests of decoding and re-encoding videos, it was found WebCodecs has 20x the performance of ffmpeg.wasm.
Chrome 94 released the WebCodecs API, meaning JS can now handle audio/video as well. Before this, frontend developers could only use ffmpeg.wasm in limited scenarios. Through simple tests of decoding and re-encoding videos, it was found WebCodecs has 20x the performance of ffmpeg.wasm.
_WebAV is compatible with Chrome 102 and above due to the use of the new OPFS API._

Chrome 94 开放了 WebCodecs API,意味着 JS 也能处理音视频了。在此之前,前端开发在会在有限的场景使用 ffmpeg.wasm,经过简单地测试解码和重编码视频发现,WebCodecs 性能是 ffmpeg.wasm 的 20 倍。
Chrome 94 开放了 WebCodecs API,意味着 JS 也能处理音视频了。在此之前,前端开发在会在有限的场景使用 ffmpeg.wasm,经过简单地测试解码和重编码视频发现,WebCodecs 性能是 ffmpeg.wasm 的 20 倍。
_WebAV 兼容 Chrome102 及以上版本,因为使用 opfs 新 API。_

This is an experimental project attempting to provide easy-to-use APIs for handling audio/video data in the browser. The project is under active development, feel free to open issues to report bugs or suggest new features.

2 changes: 2 additions & 0 deletions packages/av-cliper/demo/concat-media.ts
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@ document.querySelector('#mp4-img')?.addEventListener('click', () => {
const spr1 = new OffscreenSprite(
new MP4Clip((await fetch(resList[0])).body!),
);
spr1.time.duration = 3e6;

const spr2 = new OffscreenSprite(
new ImgClip(
@@ -69,6 +70,7 @@ document.querySelector('#mp4-img')?.addEventListener('click', () => {
videoCodec: 'avc1.42E032',
bgColor: 'white',
// audio: false,
metaDataTags: { hello: 'world' },
});

await com.addSprite(spr1, { main: true });
2 changes: 1 addition & 1 deletion packages/av-cliper/demo/decode-media.ts
Original file line number Diff line number Diff line change
@@ -114,7 +114,7 @@ document.querySelector('#decode-audio')?.addEventListener('click', () => {
});

const videos = {
'bunny.mp4': './video/bunny.mp4',
'bunny.mp4': './video/pri-test-metadata.mp4',
'bear.mp4': './video/bear-vp9.mp4',
};
document.querySelector('#decode-video')?.addEventListener('click', () => {
1 change: 1 addition & 0 deletions packages/av-cliper/src/clips/mp4-clip.ts
Original file line number Diff line number Diff line change
@@ -497,6 +497,7 @@ async function parseMP4Stream(
Error('MP4Clip must contain at least one video or audio track'),
);
}
console.log(111111, data.file.moov.udta);
Log.info(
'mp4BoxFile moov ready',
{
5 changes: 5 additions & 0 deletions packages/av-cliper/src/combinator.ts
Original file line number Diff line number Diff line change
@@ -15,6 +15,10 @@ interface ICombinatorOpts {
* false 合成的视频文件中排除音轨
*/
audio?: false;
/**
* 向输出的视频中写入 meta tags 数据
*/
metaDataTags?: Record<string, string>;
}

let COM_ID = 0;
@@ -133,6 +137,7 @@ export class Combinator {
sampleRate: DEFAULT_AUDIO_CONF.sampleRate,
channelCount: DEFAULT_AUDIO_CONF.channelCount,
},
metaDataTags: opts.metaDataTags,
});

TOTAL_COM_ENCODE_QSIZE.set(this, this.#remux.getEecodeQueueSize);
21 changes: 21 additions & 0 deletions packages/av-cliper/src/mp4-utils/index.ts
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ import { EventTool } from '../event-tool';
import { SampleTransform } from './sample-transform';
import { extractFileConfig, unsafeReleaseMP4BoxFile } from './mp4box-utils';
import { tmpfile, write } from 'opfs-tools';
import { createMetaBox } from './meta-box';

type TCleanFn = () => void;

@@ -35,6 +36,7 @@ interface IRecodeMuxOpts {
sampleRate: number;
channelCount: number;
} | null;
metaDataTags?: Record<string, string>;
}

export function recodemux(opts: IRecodeMuxOpts): {
@@ -52,6 +54,25 @@ export function recodemux(opts: IRecodeMuxOpts): {
const avSyncEvtTool = new EventTool<
Record<'VideoReady' | 'AudioReady', () => void>
>();

let metaAdded = false;
const addMetadata = () => {
if (metaAdded) return;
metaAdded = true;
if (mp4file.moov == null) return;

const udtaBox = mp4file.moov.add('udta');
const metaBox = udtaBox.add('meta');
const data = { hello: 'world', foo: 'bar' };
metaBox.data = createMetaBox(data);
metaBox.size = metaBox.data.byteLength;
};

if (opts.metaDataTags != null) {
avSyncEvtTool.once('VideoReady', addMetadata);
avSyncEvtTool.once('AudioReady', addMetadata);
}

let vEncoder =
opts.video != null
? encodeVideoTrack(opts.video, mp4file, avSyncEvtTool)
124 changes: 124 additions & 0 deletions packages/av-cliper/src/mp4-utils/meta-box.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
const createBoxHeader = (type: string, size: number): Uint8Array => {
const buffer = new Uint8Array(8);
const view = new DataView(buffer.buffer);
view.setUint32(0, size); // Write size as a 32-bit unsigned integer
for (let i = 0; i < 4; i++) {
buffer[4 + i] = type.charCodeAt(i); // Write type as a 4-character string
}
return buffer;
};

const createHdlrBox = (): Uint8Array => {
const tec = new TextEncoder();
const handlerType = tec.encode('mdta');
const nameBytes = tec.encode('mp4 handler');
// header8 + ?8 + mdta4 + ?12 + nameSize + endFlag1
const size = 8 + 8 + 4 + 12 + nameBytes.byteLength + 1;
const buffer = new Uint8Array(size);
const view = new DataView(buffer.buffer);

// Box header
buffer.set(createBoxHeader('hdlr', size), 0);

// Full box header (version and flags)
view.setUint32(8, 0);

buffer.set(handlerType, 16);
buffer.set(nameBytes, 32);

return buffer;
};

const createKeysBox = (keys: string[]): Uint8Array => {
const tec = new TextEncoder();
const keyNamespace = tec.encode('mdta');
const keyData = keys.map((key) => {
const keyBuf = tec.encode(key);
// size4 + namespace4 + keyBuf
const size = 4 + 4 + keyBuf.byteLength;

const entryBuf = new Uint8Array(size);
const dv = new DataView(entryBuf.buffer);
dv.setUint32(0, size);
entryBuf.set(keyNamespace, 4);
entryBuf.set(keyBuf, 4 + keyNamespace.byteLength);

return entryBuf;
});
const keyDataSize = keyData.reduce((acc, cur) => acc + cur.byteLength, 0);

const size = 16 + keyDataSize; // 16 bytes for the header and version/flags
const buffer = new Uint8Array(size);
const view = new DataView(buffer.buffer);

// Box header
buffer.set(createBoxHeader('keys', size), 0);

// Full box header (version and flags)
view.setUint32(8, 0);
view.setUint32(12, keys.length); // Entry count

// Keys
let offset = 16;
for (const keyBuf of keyData) {
buffer.set(keyBuf, offset);
offset += keyBuf.byteLength;
}

return buffer;
};

const createIlstBox = (data: Record<string, string>): Uint8Array => {
const tec = new TextEncoder();
const dataStrBuf = tec.encode('data');
const valueData = Object.entries(data).map(([_, value], index) => {
const keyId = index + 1; // Assuming keys start from 1
const valueBytes = tec.encode(value);
// size4 + keyId4 + valueSize4 + data4 + idx4 + ?4 + value
const entrySize = 4 + 4 + 4 + 4 + 4 + 4 + valueBytes.byteLength;

const buffer = new Uint8Array(entrySize);
const view = new DataView(buffer.buffer);
view.setUint32(0, entrySize);
view.setUint32(4, keyId);

view.setUint32(8, 16 + valueBytes.byteLength);
buffer.set(dataStrBuf, 12); // 'data' type

// data idx=1
view.setUint32(16, 1);
// Value
buffer.set(valueBytes, 24);

return buffer;
});

const valueDataSize = valueData.reduce((acc, cur) => acc + cur.byteLength, 0);
const totalSizwe = 8 + valueDataSize;
const buffer = new Uint8Array(totalSizwe);
buffer.set(createBoxHeader('ilst', totalSizwe), 0);

let offset = 8;
for (const entry of valueData) {
buffer.set(entry, offset);
offset += entry.byteLength;
}

return buffer;
};

export const createMetaBox = (data: Record<string, string>): Uint8Array => {
const hdlrBox = createHdlrBox();
const keysBox = createKeysBox(Object.keys(data));
const ilstBox = createIlstBox(data);

const size = hdlrBox.length + keysBox.length + ilstBox.length;
const buffer = new Uint8Array(size);

// buffer.set(createBoxHeader('meta', size), 0);
buffer.set(hdlrBox, 0);
buffer.set(keysBox, hdlrBox.length);
buffer.set(ilstBox, hdlrBox.length + keysBox.length);

return buffer;
};
4 changes: 4 additions & 0 deletions types/mp4box.d.ts
Original file line number Diff line number Diff line change
@@ -118,8 +118,11 @@ declare module '@webav/mp4box.js' {
hdr_size: number;
start: number;
type: string;
data?: Uint8Array;
write: (dataStream: DataStream) => void;
parse: (dataStream: DataStream) => void;
add: (name: string) => BoxParser;
addEntry: (value: string, name: string) => BoxParser;
}

export interface TrakBoxParser extends BoxParser {
@@ -258,6 +261,7 @@ declare module '@webav/mp4box.js' {
moofs: MOOFBoxParser[];
moov?: MOOVBoxParser;

add: (name: string) => BoxParser;
addTrack: (opts: VideoTrackOpts | AudioTrackOpts) => number;
addSample: (trackId: number, buf: ArrayBuffer, sample: SampleOpts) => void;
releaseUsedSamples(id: number, usedCount: number): void;

0 comments on commit 7b8e814

Please sign in to comment.