Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configure clang-format to enforce single space between definition blocks #468

Merged
merged 2 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
ReflowComments: true
SeparateDefinitionBlocks: Always
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
Expand Down
4 changes: 4 additions & 0 deletions src/torchcodec/decoders/_core/FFMPEGCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,22 @@ namespace facebook::torchcodec {
AutoAVPacket::AutoAVPacket() : avPacket_(av_packet_alloc()) {
TORCH_CHECK(avPacket_ != nullptr, "Couldn't allocate avPacket.");
}

AutoAVPacket::~AutoAVPacket() {
av_packet_free(&avPacket_);
}

ReferenceAVPacket::ReferenceAVPacket(AutoAVPacket& shared)
: avPacket_(shared.avPacket_) {}

ReferenceAVPacket::~ReferenceAVPacket() {
av_packet_unref(avPacket_);
}

AVPacket* ReferenceAVPacket::get() {
return avPacket_;
}

AVPacket* ReferenceAVPacket::operator->() {
return avPacket_;
}
Expand Down
2 changes: 2 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ void VideoDecoder::createFilterGraph(
}

enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE};

ffmpegStatus = av_opt_set_int_list(
filterState.sinkContext,
"pix_fmts",
Expand Down Expand Up @@ -676,6 +677,7 @@ int VideoDecoder::getKeyFrameIndexForPts(
}
return getKeyFrameIndexForPtsUsingScannedIndex(streamInfo.keyFrames, pts);
}

/*
Videos have I frames and non-I frames (P and B frames). Non-I frames need data
from the previous I frame to be decoded.
Expand Down
19 changes: 19 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class VideoDecoder {
// Updates the metadata of the video to accurate values obtained by scanning
// the contents of the video file.
void scanFileAndUpdateMetadataAndIndex();

struct StreamMetadata {
// Common (video and audio) fields derived from the AVStream.
int streamIndex;
Expand Down Expand Up @@ -103,6 +104,7 @@ class VideoDecoder {
std::optional<int64_t> width;
std::optional<int64_t> height;
};

struct ContainerMetadata {
std::vector<StreamMetadata> streams;
int numAudioStreams = 0;
Expand All @@ -117,6 +119,7 @@ class VideoDecoder {
// If set, this is the index to the default video stream.
std::optional<int> bestVideoStreamIndex;
};

// Returns the metadata for the container.
ContainerMetadata getContainerMetadata() const;

Expand All @@ -130,8 +133,10 @@ class VideoDecoder {
// Use the libswscale library for color conversion.
SWSCALE
};

struct VideoStreamDecoderOptions {
VideoStreamDecoderOptions() {}

explicit VideoStreamDecoderOptions(const std::string& optionsString);
// Number of threads we pass to FFMPEG for decoding.
// 0 means FFMPEG will choose the number of threads automatically to fully
Expand All @@ -149,7 +154,9 @@ class VideoDecoder {
// By default we use CPU for decoding for both C++ and python users.
torch::Device device = torch::kCPU;
};

struct AudioStreamDecoderOptions {};

void addVideoStreamDecoder(
int streamIndex,
const VideoStreamDecoderOptions& options = VideoStreamDecoderOptions());
Expand All @@ -164,6 +171,7 @@ class VideoDecoder {
// Calling getNextFrameNoDemuxInternal() will return the first frame at
// or after this position.
void setCursorPtsInSeconds(double seconds);

// This structure ensures we always keep the streamIndex and AVFrame together
// Note that AVFrame itself doesn't retain the streamIndex.
struct RawDecodedOutput {
Expand All @@ -172,6 +180,7 @@ class VideoDecoder {
// The stream index of the decoded frame.
int streamIndex;
};

struct DecodedOutput {
// The actual decoded output as a Tensor.
torch::Tensor frame;
Expand All @@ -183,11 +192,13 @@ class VideoDecoder {
// The duration of the decoded frame in seconds.
double durationSeconds;
};

class EndOfFileException : public std::runtime_error {
public:
explicit EndOfFileException(const std::string& msg)
: std::runtime_error(msg) {}
};

// Decodes the frame where the current cursor position is. It also advances
// the cursor to the next frame.
DecodedOutput getNextFrameNoDemux();
Expand All @@ -207,6 +218,7 @@ class VideoDecoder {
int streamIndex,
int64_t frameIndex,
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);

struct BatchDecodedOutput {
torch::Tensor frames;
torch::Tensor ptsSeconds;
Expand Down Expand Up @@ -257,6 +269,7 @@ class VideoDecoder {
int streamIndex,
double startSeconds,
double stopSeconds);

// --------------------------------------------------------------------------
// DECODER PERFORMANCE STATISTICS API
// --------------------------------------------------------------------------
Expand All @@ -271,6 +284,7 @@ class VideoDecoder {
int64_t numFramesReceivedByDecoder = 0;
int64_t numFlushes = 0;
};

DecodeStats getDecodeStats() const;
void resetDecodeStats();

Expand All @@ -286,11 +300,13 @@ class VideoDecoder {
// done during pts -> index conversions.)
int64_t nextPts = INT64_MAX;
};

struct FilterState {
UniqueAVFilterGraph filterGraph;
AVFilterContext* sourceContext = nullptr;
AVFilterContext* sinkContext = nullptr;
};

struct DecodedFrameContext {
int decodedWidth;
int decodedHeight;
Expand All @@ -300,6 +316,7 @@ class VideoDecoder {
bool operator==(const DecodedFrameContext&);
bool operator!=(const DecodedFrameContext&);
};

// Stores information for each stream.
struct StreamInfo {
int streamIndex = -1;
Expand All @@ -323,6 +340,7 @@ class VideoDecoder {
DecodedFrameContext prevFrameContext;
UniqueSwsContext swsContext;
};

// Returns the key frame index of the presentation timestamp using FFMPEG's
// index. Note that this index may be truncated for some files.
int getKeyFrameIndexForPtsUsingEncoderIndex(AVStream* stream, int64_t pts)
Expand Down Expand Up @@ -480,6 +498,7 @@ class VideoDecoder {
struct FrameDims {
int height;
int width;

FrameDims(int h, int w) : height(h), width(w) {}
};

Expand Down
1 change: 1 addition & 0 deletions src/torchcodec/decoders/_core/VideoDecoderOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ OpsBatchDecodedOutput get_frames_in_range(
stream_index, start, stop, step.value_or(1));
return makeOpsBatchDecodedOutput(result);
}

OpsBatchDecodedOutput get_frames_by_pts(
at::Tensor& decoder,
int64_t stream_index,
Expand Down
1 change: 1 addition & 0 deletions test/decoders/VideoDecoderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class VideoDecoderTest : public testing::TestWithParam<bool> {
filepath, VideoDecoder::SeekMode::approximate);
}
}

std::string content_;
};

Expand Down
Loading