127 lines
3.7 KiB
JavaScript
127 lines
3.7 KiB
JavaScript
const { parseTfdt } = require("../tools/mp4-inspector");
|
|
const findBox = require("./find-box");
|
|
const { getTimescaleFromMediaHeader } = require("./probe");
|
|
const { parseSamples, getMdatTrafPairs } = require("./samples");
|
|
|
|
/**
|
|
* Module for parsing WebVTT text and styles from FMP4 segments.
|
|
* Based on the ISO/IEC 14496-30.
|
|
*/
|
|
const WebVttParser = function() {
|
|
// default timescale to 90k
|
|
let timescale = 90e3;
|
|
|
|
/**
|
|
* Parses the timescale from the init segment.
|
|
* @param {Array<Uint8Array>} segment The initialization segment to parse the timescale from.
|
|
*/
|
|
this.init = function(segment) {
|
|
// We just need the timescale from the init segment.
|
|
const mdhd = findBox(segment, ['moov', 'trak', 'mdia', 'mdhd'])[0];
|
|
|
|
if (mdhd) {
|
|
timescale = getTimescaleFromMediaHeader(mdhd);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Parses a WebVTT FMP4 segment.
|
|
* @param {Array<Uint8Array>} segment The content segment to parse the WebVTT cues from.
|
|
* @returns The WebVTT cue text, styling, and timing info as an array of cue objects.
|
|
*/
|
|
this.parseSegment = function(segment) {
|
|
const vttCues = [];
|
|
const mdatTrafPairs = getMdatTrafPairs(segment);
|
|
let baseMediaDecodeTime = 0;
|
|
|
|
mdatTrafPairs.forEach(function(pair) {
|
|
const mdatBox = pair.mdat;
|
|
const trafBox = pair.traf;
|
|
// zero or one.
|
|
const tfdtBox = findBox(trafBox, ['tfdt'])[0];
|
|
// zero or one.
|
|
const tfhdBox = findBox(trafBox, ['tfhd'])[0];
|
|
// zero or more.
|
|
const trunBoxes = findBox(trafBox, ['trun']);
|
|
|
|
if (tfdtBox) {
|
|
const tfdt = parseTfdt(tfdtBox);
|
|
|
|
baseMediaDecodeTime = tfdt.baseMediaDecodeTime;
|
|
}
|
|
|
|
if (trunBoxes.length && tfhdBox) {
|
|
const samples = parseSamples(trunBoxes, baseMediaDecodeTime, tfhdBox);
|
|
let mdatOffset = 0;
|
|
|
|
samples.forEach(function(sample) {
|
|
// decode utf8 payload
|
|
const UTF_8 = 'utf-8';
|
|
const textDecoder = new TextDecoder(UTF_8);
|
|
// extract sample data from the mdat box.
|
|
// WebVTT Sample format:
|
|
// Exactly one VTTEmptyCueBox box
|
|
// OR one or more VTTCueBox boxes.
|
|
const sampleData = mdatBox.slice(mdatOffset, mdatOffset + sample.size);
|
|
// single vtte box.
|
|
const vtteBox = findBox(sampleData, ['vtte'])[0];
|
|
|
|
// empty box
|
|
if (vtteBox) {
|
|
mdatOffset += sample.size;
|
|
return;
|
|
}
|
|
|
|
// TODO: Support 'vtta' boxes.
|
|
// VTTAdditionalTextBoxes can be interleaved between VTTCueBoxes.
|
|
|
|
const vttcBoxes = findBox(sampleData, ['vttc']);
|
|
|
|
vttcBoxes.forEach(function(vttcBox) {
|
|
// mandatory payload box.
|
|
const paylBox = findBox(vttcBox, ['payl'])[0];
|
|
// optional settings box
|
|
const sttgBox = findBox(vttcBox, ['sttg'])[0];
|
|
const start = sample.pts / timescale;
|
|
const end = (sample.pts + sample.duration) / timescale;
|
|
let cueText, settings;
|
|
|
|
// contains cue text.
|
|
if (paylBox) {
|
|
try {
|
|
cueText = textDecoder.decode(paylBox);
|
|
} catch(e) {
|
|
console.error(e);
|
|
}
|
|
}
|
|
|
|
// settings box contains styling.
|
|
if (sttgBox) {
|
|
try {
|
|
settings = textDecoder.decode(sttgBox);
|
|
} catch(e) {
|
|
console.error(e);
|
|
}
|
|
}
|
|
|
|
if (sample.duration && cueText) {
|
|
vttCues.push({
|
|
cueText,
|
|
start,
|
|
end,
|
|
settings
|
|
});
|
|
}
|
|
});
|
|
|
|
mdatOffset += sample.size;
|
|
});
|
|
}
|
|
});
|
|
|
|
return vttCues;
|
|
};
|
|
};
|
|
|
|
module.exports = WebVttParser;
|