423 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			423 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | |
|  * mux.js
 | |
|  *
 | |
|  * Copyright (c) Brightcove
 | |
|  * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
 | |
|  *
 | |
|  * Reads in-band CEA-708 captions out of FMP4 segments.
 | |
|  * @see https://en.wikipedia.org/wiki/CEA-708
 | |
|  */
 | |
| 'use strict';
 | |
| 
 | |
| var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
 | |
| 
 | |
| var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
 | |
| 
 | |
| var findBox = require('../mp4/find-box.js');
 | |
| 
 | |
| var parseTfdt = require('../tools/parse-tfdt.js');
 | |
| 
 | |
| var parseTfhd = require('../tools/parse-tfhd.js');
 | |
| 
 | |
| var _require = require('./samples.js'),
 | |
|     getMdatTrafPairs = _require.getMdatTrafPairs,
 | |
|     parseSamples = _require.parseSamples;
 | |
| /**
 | |
|   * Maps an offset in the mdat to a sample based on the the size of the samples.
 | |
|   * Assumes that `parseSamples` has been called first.
 | |
|   *
 | |
|   * @param {Number} offset - The offset into the mdat
 | |
|   * @param {Object[]} samples - An array of samples, parsed using `parseSamples`
 | |
|   * @return {?Object} The matching sample, or null if no match was found.
 | |
|   *
 | |
|   * @see ISO-BMFF-12/2015, Section 8.8.8
 | |
|  **/
 | |
| 
 | |
| 
 | |
| var mapToSample = function mapToSample(offset, samples) {
 | |
|   var approximateOffset = offset;
 | |
| 
 | |
|   for (var i = 0; i < samples.length; i++) {
 | |
|     var sample = samples[i];
 | |
| 
 | |
|     if (approximateOffset < sample.size) {
 | |
|       return sample;
 | |
|     }
 | |
| 
 | |
|     approximateOffset -= sample.size;
 | |
|   }
 | |
| 
 | |
|   return null;
 | |
| };
 | |
| /**
 | |
|   * Finds SEI nal units contained in a Media Data Box.
 | |
|   * Assumes that `parseSamples` has been called first.
 | |
|   *
 | |
|   * @param {Uint8Array} avcStream - The bytes of the mdat
 | |
|   * @param {Object[]} samples - The samples parsed out by `parseSamples`
 | |
|   * @param {Number} trackId - The trackId of this video track
 | |
|   * @return {Object[]} seiNals - the parsed SEI NALUs found.
 | |
|   *   The contents of the seiNal should match what is expected by
 | |
|   *   CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
 | |
|   *
 | |
|   * @see ISO-BMFF-12/2015, Section 8.1.1
 | |
|   * @see Rec. ITU-T H.264, 7.3.2.3.1
 | |
|  **/
 | |
| 
 | |
| 
 | |
| var findSeiNals = function findSeiNals(avcStream, samples, trackId) {
 | |
|   var avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
 | |
|       result = {
 | |
|     logs: [],
 | |
|     seiNals: []
 | |
|   },
 | |
|       seiNal,
 | |
|       i,
 | |
|       length,
 | |
|       lastMatchedSample;
 | |
| 
 | |
|   for (i = 0; i + 4 < avcStream.length; i += length) {
 | |
|     length = avcView.getUint32(i);
 | |
|     i += 4; // Bail if this doesn't appear to be an H264 stream
 | |
| 
 | |
|     if (length <= 0) {
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     switch (avcStream[i] & 0x1F) {
 | |
|       case 0x06:
 | |
|         var data = avcStream.subarray(i + 1, i + 1 + length);
 | |
|         var matchingSample = mapToSample(i, samples);
 | |
|         seiNal = {
 | |
|           nalUnitType: 'sei_rbsp',
 | |
|           size: length,
 | |
|           data: data,
 | |
|           escapedRBSP: discardEmulationPreventionBytes(data),
 | |
|           trackId: trackId
 | |
|         };
 | |
| 
 | |
|         if (matchingSample) {
 | |
|           seiNal.pts = matchingSample.pts;
 | |
|           seiNal.dts = matchingSample.dts;
 | |
|           lastMatchedSample = matchingSample;
 | |
|         } else if (lastMatchedSample) {
 | |
|           // If a matching sample cannot be found, use the last
 | |
|           // sample's values as they should be as close as possible
 | |
|           seiNal.pts = lastMatchedSample.pts;
 | |
|           seiNal.dts = lastMatchedSample.dts;
 | |
|         } else {
 | |
|           result.logs.push({
 | |
|             level: 'warn',
 | |
|             message: 'We\'ve encountered a nal unit without data at ' + i + ' for trackId ' + trackId + '. See mux.js#223.'
 | |
|           });
 | |
|           break;
 | |
|         }
 | |
| 
 | |
|         result.seiNals.push(seiNal);
 | |
|         break;
 | |
| 
 | |
|       default:
 | |
|         break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return result;
 | |
| };
 | |
| /**
 | |
|   * Parses out caption nals from an FMP4 segment's video tracks.
 | |
|   *
 | |
|   * @param {Uint8Array} segment - The bytes of a single segment
 | |
|   * @param {Number} videoTrackId - The trackId of a video track in the segment
 | |
|   * @return {Object.<Number, Object[]>} A mapping of video trackId to
 | |
|   *   a list of seiNals found in that track
 | |
|  **/
 | |
| 
 | |
| 
 | |
| var parseCaptionNals = function parseCaptionNals(segment, videoTrackId) {
 | |
|   var captionNals = {};
 | |
|   var mdatTrafPairs = getMdatTrafPairs(segment);
 | |
|   mdatTrafPairs.forEach(function (pair) {
 | |
|     var mdat = pair.mdat;
 | |
|     var traf = pair.traf;
 | |
|     var tfhd = findBox(traf, ['tfhd']); // Exactly 1 tfhd per traf
 | |
| 
 | |
|     var headerInfo = parseTfhd(tfhd[0]);
 | |
|     var trackId = headerInfo.trackId;
 | |
|     var tfdt = findBox(traf, ['tfdt']); // Either 0 or 1 tfdt per traf
 | |
| 
 | |
|     var baseMediaDecodeTime = tfdt.length > 0 ? parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
 | |
|     var truns = findBox(traf, ['trun']);
 | |
|     var samples;
 | |
|     var result; // Only parse video data for the chosen video track
 | |
| 
 | |
|     if (videoTrackId === trackId && truns.length > 0) {
 | |
|       samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
 | |
|       result = findSeiNals(mdat, samples, trackId);
 | |
| 
 | |
|       if (!captionNals[trackId]) {
 | |
|         captionNals[trackId] = {
 | |
|           seiNals: [],
 | |
|           logs: []
 | |
|         };
 | |
|       }
 | |
| 
 | |
|       captionNals[trackId].seiNals = captionNals[trackId].seiNals.concat(result.seiNals);
 | |
|       captionNals[trackId].logs = captionNals[trackId].logs.concat(result.logs);
 | |
|     }
 | |
|   });
 | |
|   return captionNals;
 | |
| };
 | |
| /**
 | |
|   * Parses out inband captions from an MP4 container and returns
 | |
|   * caption objects that can be used by WebVTT and the TextTrack API.
 | |
|   * @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
 | |
|   * @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
 | |
|   * Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
 | |
|   *
 | |
|   * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
 | |
|   * @param {Number} trackId - The id of the video track to parse
 | |
|   * @param {Number} timescale - The timescale for the video track from the init segment
 | |
|   *
 | |
|   * @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
 | |
|   * @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
 | |
|   * @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
 | |
|   * @return {Object[]} parsedCaptions[].content - A list of individual caption segments
 | |
|   * @return {String} parsedCaptions[].content.text - The visible content of the caption segment
 | |
|   * @return {Number} parsedCaptions[].content.line - The line height from 1-15 for positioning of the caption segment
 | |
|   * @return {Number} parsedCaptions[].content.position - The column indent percentage for cue positioning from 10-80
 | |
|  **/
 | |
| 
 | |
| 
 | |
| var parseEmbeddedCaptions = function parseEmbeddedCaptions(segment, trackId, timescale) {
 | |
|   var captionNals; // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
 | |
| 
 | |
|   if (trackId === null) {
 | |
|     return null;
 | |
|   }
 | |
| 
 | |
|   captionNals = parseCaptionNals(segment, trackId);
 | |
|   var trackNals = captionNals[trackId] || {};
 | |
|   return {
 | |
|     seiNals: trackNals.seiNals,
 | |
|     logs: trackNals.logs,
 | |
|     timescale: timescale
 | |
|   };
 | |
| };
 | |
| /**
 | |
|   * Converts SEI NALUs into captions that can be used by video.js
 | |
|  **/
 | |
| 
 | |
| 
 | |
| var CaptionParser = function CaptionParser() {
 | |
|   var isInitialized = false;
 | |
|   var captionStream; // Stores segments seen before trackId and timescale are set
 | |
| 
 | |
|   var segmentCache; // Stores video track ID of the track being parsed
 | |
| 
 | |
|   var trackId; // Stores the timescale of the track being parsed
 | |
| 
 | |
|   var timescale; // Stores captions parsed so far
 | |
| 
 | |
|   var parsedCaptions; // Stores whether we are receiving partial data or not
 | |
| 
 | |
|   var parsingPartial;
 | |
|   /**
 | |
|     * A method to indicate whether a CaptionParser has been initalized
 | |
|     * @returns {Boolean}
 | |
|    **/
 | |
| 
 | |
|   this.isInitialized = function () {
 | |
|     return isInitialized;
 | |
|   };
 | |
|   /**
 | |
|     * Initializes the underlying CaptionStream, SEI NAL parsing
 | |
|     * and management, and caption collection
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.init = function (options) {
 | |
|     captionStream = new CaptionStream();
 | |
|     isInitialized = true;
 | |
|     parsingPartial = options ? options.isPartial : false; // Collect dispatched captions
 | |
| 
 | |
|     captionStream.on('data', function (event) {
 | |
|       // Convert to seconds in the source's timescale
 | |
|       event.startTime = event.startPts / timescale;
 | |
|       event.endTime = event.endPts / timescale;
 | |
|       parsedCaptions.captions.push(event);
 | |
|       parsedCaptions.captionStreams[event.stream] = true;
 | |
|     });
 | |
|     captionStream.on('log', function (log) {
 | |
|       parsedCaptions.logs.push(log);
 | |
|     });
 | |
|   };
 | |
|   /**
 | |
|     * Determines if a new video track will be selected
 | |
|     * or if the timescale changed
 | |
|     * @return {Boolean}
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.isNewInit = function (videoTrackIds, timescales) {
 | |
|     if (videoTrackIds && videoTrackIds.length === 0 || timescales && typeof timescales === 'object' && Object.keys(timescales).length === 0) {
 | |
|       return false;
 | |
|     }
 | |
| 
 | |
|     return trackId !== videoTrackIds[0] || timescale !== timescales[trackId];
 | |
|   };
 | |
|   /**
 | |
|     * Parses out SEI captions and interacts with underlying
 | |
|     * CaptionStream to return dispatched captions
 | |
|     *
 | |
|     * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
 | |
|     * @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
 | |
|     * @param {Object.<Number, Number>} timescales - The timescales found in the init segment
 | |
|     * @see parseEmbeddedCaptions
 | |
|     * @see m2ts/caption-stream.js
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.parse = function (segment, videoTrackIds, timescales) {
 | |
|     var parsedData;
 | |
| 
 | |
|     if (!this.isInitialized()) {
 | |
|       return null; // This is not likely to be a video segment
 | |
|     } else if (!videoTrackIds || !timescales) {
 | |
|       return null;
 | |
|     } else if (this.isNewInit(videoTrackIds, timescales)) {
 | |
|       // Use the first video track only as there is no
 | |
|       // mechanism to switch to other video tracks
 | |
|       trackId = videoTrackIds[0];
 | |
|       timescale = timescales[trackId]; // If an init segment has not been seen yet, hold onto segment
 | |
|       // data until we have one.
 | |
|       // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
 | |
|     } else if (trackId === null || !timescale) {
 | |
|       segmentCache.push(segment);
 | |
|       return null;
 | |
|     } // Now that a timescale and trackId is set, parse cached segments
 | |
| 
 | |
| 
 | |
|     while (segmentCache.length > 0) {
 | |
|       var cachedSegment = segmentCache.shift();
 | |
|       this.parse(cachedSegment, videoTrackIds, timescales);
 | |
|     }
 | |
| 
 | |
|     parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
 | |
| 
 | |
|     if (parsedData && parsedData.logs) {
 | |
|       parsedCaptions.logs = parsedCaptions.logs.concat(parsedData.logs);
 | |
|     }
 | |
| 
 | |
|     if (parsedData === null || !parsedData.seiNals) {
 | |
|       if (parsedCaptions.logs.length) {
 | |
|         return {
 | |
|           logs: parsedCaptions.logs,
 | |
|           captions: [],
 | |
|           captionStreams: []
 | |
|         };
 | |
|       }
 | |
| 
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     this.pushNals(parsedData.seiNals); // Force the parsed captions to be dispatched
 | |
| 
 | |
|     this.flushStream();
 | |
|     return parsedCaptions;
 | |
|   };
 | |
|   /**
 | |
|     * Pushes SEI NALUs onto CaptionStream
 | |
|     * @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
 | |
|     * Assumes that `parseCaptionNals` has been called first
 | |
|     * @see m2ts/caption-stream.js
 | |
|     **/
 | |
| 
 | |
| 
 | |
|   this.pushNals = function (nals) {
 | |
|     if (!this.isInitialized() || !nals || nals.length === 0) {
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     nals.forEach(function (nal) {
 | |
|       captionStream.push(nal);
 | |
|     });
 | |
|   };
 | |
|   /**
 | |
|     * Flushes underlying CaptionStream to dispatch processed, displayable captions
 | |
|     * @see m2ts/caption-stream.js
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.flushStream = function () {
 | |
|     if (!this.isInitialized()) {
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     if (!parsingPartial) {
 | |
|       captionStream.flush();
 | |
|     } else {
 | |
|       captionStream.partialFlush();
 | |
|     }
 | |
|   };
 | |
|   /**
 | |
|     * Reset caption buckets for new data
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.clearParsedCaptions = function () {
 | |
|     parsedCaptions.captions = [];
 | |
|     parsedCaptions.captionStreams = {};
 | |
|     parsedCaptions.logs = [];
 | |
|   };
 | |
|   /**
 | |
|     * Resets underlying CaptionStream
 | |
|     * @see m2ts/caption-stream.js
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.resetCaptionStream = function () {
 | |
|     if (!this.isInitialized()) {
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     captionStream.reset();
 | |
|   };
 | |
|   /**
 | |
|     * Convenience method to clear all captions flushed from the
 | |
|     * CaptionStream and still being parsed
 | |
|     * @see m2ts/caption-stream.js
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.clearAllCaptions = function () {
 | |
|     this.clearParsedCaptions();
 | |
|     this.resetCaptionStream();
 | |
|   };
 | |
|   /**
 | |
|     * Reset caption parser
 | |
|    **/
 | |
| 
 | |
| 
 | |
|   this.reset = function () {
 | |
|     segmentCache = [];
 | |
|     trackId = null;
 | |
|     timescale = null;
 | |
| 
 | |
|     if (!parsedCaptions) {
 | |
|       parsedCaptions = {
 | |
|         captions: [],
 | |
|         // CC1, CC2, CC3, CC4
 | |
|         captionStreams: {},
 | |
|         logs: []
 | |
|       };
 | |
|     } else {
 | |
|       this.clearParsedCaptions();
 | |
|     }
 | |
| 
 | |
|     this.resetCaptionStream();
 | |
|   };
 | |
| 
 | |
|   this.reset();
 | |
| };
 | |
| 
 | |
| module.exports = CaptionParser; |