Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add media.ccc specific parsing of recordings #1250

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.services.media_ccc.extractors.data.MediaCCCRecording;
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferenceLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.AudioStream;
Expand All @@ -28,15 +29,18 @@
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.LocaleCompat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

public class MediaCCCStreamExtractor extends StreamExtractor {
private JsonObject data;
Expand Down Expand Up @@ -100,64 +104,55 @@ public List<Image> getUploaderAvatars() {
}

@Override
public List<AudioStream> getAudioStreams() throws ExtractionException {
final JsonArray recordings = data.getArray("recordings");
public List<AudioStream> getAudioStreams() {
final List<MediaCCCRecording.Audio> recordings = getRecordings().stream()
.flatMap(r ->
r instanceof MediaCCCRecording.Audio
? Stream.of((MediaCCCRecording.Audio) r)
: Stream.empty()
)
.collect(Collectors.toList());
final List<AudioStream> audioStreams = new ArrayList<>();
for (int i = 0; i < recordings.size(); i++) {
final JsonObject recording = recordings.getObject(i);
final String mimeType = recording.getString("mime_type");
if (mimeType.startsWith("audio")) {
// First we need to resolve the actual video data from the CDN
final MediaFormat mediaFormat;
if (mimeType.endsWith("opus")) {
mediaFormat = MediaFormat.OPUS;
} else if (mimeType.endsWith("mpeg")) {
mediaFormat = MediaFormat.MP3;
} else if (mimeType.endsWith("ogg")) {
mediaFormat = MediaFormat.OGG;
} else {
mediaFormat = null;
}

final AudioStream.Builder builder = new AudioStream.Builder()
.setId(recording.getString("filename", ID_UNKNOWN))
.setContent(recording.getString("recording_url"), true)
.setMediaFormat(mediaFormat)
.setAverageBitrate(UNKNOWN_BITRATE);

final String language = recording.getString("language");
// If the language contains a - symbol, this means that the stream has an audio
// track with multiple languages, so there is no specific language for this stream
// Don't set the audio language in this case
if (language != null && !language.contains("-")) {
builder.setAudioLocale(LocaleCompat.forLanguageTag(language).orElseThrow(() ->
new ParsingException(
"Cannot convert this language to a locale: " + language)
));
}

// Not checking containsSimilarStream here, since MediaCCC does not provide enough
// information to decide whether two streams are similar. Hence that method would
// always return false, e.g. even for different language variations.
audioStreams.add(builder.build());
for (final MediaCCCRecording.Audio recording : recordings) {
// First we need to resolve the actual video data from the CDN
final MediaFormat mediaFormat;
if (recording.mimeType.endsWith("opus")) {
mediaFormat = MediaFormat.OPUS;
} else if (recording.mimeType.endsWith("mpeg")) {
mediaFormat = MediaFormat.MP3;
} else if (recording.mimeType.endsWith("ogg")) {
mediaFormat = MediaFormat.OGG;
} else {
mediaFormat = null;
}
audioStreams.add(new AudioStream.Builder()
.setId(recording.filename)
.setContent(recording.url, true)
.setMediaFormat(mediaFormat)
.setAverageBitrate(UNKNOWN_BITRATE)
.setAudioLocale(recording.language)
.build());
}
return audioStreams;
}

@Override
public List<VideoStream> getVideoStreams() throws ExtractionException {
final JsonArray recordings = data.getArray("recordings");

final List<MediaCCCRecording.Video> recordings = getRecordings().stream()
.flatMap(r ->
r instanceof MediaCCCRecording.Video
? Stream.of((MediaCCCRecording.Video) r)
: Stream.empty()
)
.collect(Collectors.toList());
final List<VideoStream> videoStreams = new ArrayList<>();
for (int i = 0; i < recordings.size(); i++) {
final JsonObject recording = recordings.getObject(i);
final String mimeType = recording.getString("mime_type");
if (mimeType.startsWith("video")) {
for (final MediaCCCRecording.Video recording : recordings) {
// First we need to resolve the actual video data from the CDN
final MediaFormat mediaFormat;
if (mimeType.endsWith("webm")) {
if (recording.mimeType.endsWith("webm")) {
mediaFormat = MediaFormat.WEBM;
} else if (mimeType.endsWith("mp4")) {
} else if (recording.mimeType.endsWith("mp4")) {
mediaFormat = MediaFormat.MPEG_4;
} else {
mediaFormat = null;
Expand All @@ -167,18 +162,119 @@ public List<VideoStream> getVideoStreams() throws ExtractionException {
// information to decide whether two streams are similar. Hence that method would
// always return false, e.g. even for different language variations.
videoStreams.add(new VideoStream.Builder()
.setId(recording.getString("filename", ID_UNKNOWN))
.setContent(recording.getString("recording_url"), true)
.setId(recording.filename)
.setContent(recording.url, true)
.setIsVideoOnly(false)
.setMediaFormat(mediaFormat)
.setResolution(recording.getInt("height") + "p")
.setResolution(recording.height + "p")
.build());
}
}

return videoStreams;
}

public List<MediaCCCRecording> getRecordings() {
final JsonArray recordingsArray = data.getArray("recordings");
final List<MediaCCCRecording> recordings = new ArrayList<>();
for (int i = 0; i < recordingsArray.size(); i++) {
final JsonObject recording = recordingsArray.getObject(i);
final String mimeType = recording.getString("mime_type");
final String languages = recording.getString("language");
final String url = recording.getString("recording_url");

if (mimeType.startsWith("video/")) {
final MediaCCCRecording.Video v =
new MediaCCCRecording.Video();
final String folder = recording.getString("folder");
v.filename = recording.getString("filename", ID_UNKNOWN);
// they will put the slides videos into the "slides" folder
v.recordingType = folder.contains("slides")
? MediaCCCRecording.VideoType.SLIDES
: MediaCCCRecording.VideoType.MAIN;
v.mimeType = mimeType;
v.languages = Arrays.stream(languages.split("-"))
.map(MediaCCCStreamExtractor::mediaCCCLanguageTagToLocale)
.filter(l -> l != null)
.collect(Collectors.toList());
v.url = url;
v.lengthSeconds = recording.getInt("length");
v.width = recording.getInt("width");
v.height = recording.getInt("height");
recordings.add(v);
continue;
}
if (mimeType.startsWith("audio/")) {
final MediaCCCRecording.Audio a =
new MediaCCCRecording.Audio();
a.filename = recording.getString("filename", ID_UNKNOWN);
a.mimeType = mimeType;
a.language = mediaCCCLanguageTagToLocale(languages);
a.url = url;
a.lengthSeconds = recording.getInt("length");
recordings.add(a);
continue;
}
if (mimeType == "application/x-subrip") {
final MediaCCCRecording.Subtitle s =
new MediaCCCRecording.Subtitle();
s.filename = recording.getString("filename", ID_UNKNOWN);
s.mimeType = mimeType;
s.language = mediaCCCLanguageTagToLocale(languages);
s.url = url;
recordings.add(s);
continue;
}
final String folder = recording.getString("folder");
if (mimeType.startsWith("application/") && folder.contains("slides")) {
final MediaCCCRecording.Slides s =
new MediaCCCRecording.Slides();
s.filename = recording.getString("filename", ID_UNKNOWN);
s.mimeType = mimeType;
s.language = mediaCCCLanguageTagToLocale(languages);
s.url = url;
recordings.add(s);
continue;
}
final MediaCCCRecording.Unknown u =
new MediaCCCRecording.Unknown();
u.filename = recording.getString("filename", ID_UNKNOWN);
u.mimeType = mimeType;
u.url = url;
u.rawObject = recording;
recordings.add(u);
}

return recordings;
}

/** Translate the media.ccc.de language tag to a Locale.
* The use the first three letters of the German word for the language.
* In case there’s still a `-` in the string, we’ll split on the first part.
* @param language language tag
* @return null if we don’t have that language in our switch, or Locale
*/
private static @Nullable Locale mediaCCCLanguageTagToLocale(@Nonnull String language) {
final int idx = language.indexOf('-');
if (idx != -1) {
// TODO: would be cool if we could WARN here, but let’s just continue in case there’s still a separator
language = language.substring(0, idx);
}
switch (language) {
case "deu":
return Locale.GERMAN;
case "eng":
return Locale.ENGLISH;
case "fra":
return Locale.FRENCH;
case "ita":
return Locale.ITALIAN;
case "spa":
return Locale.forLanguageTag("es");
default:
return null;
}
}

@Override
public List<VideoStream> getVideoOnlyStreams() {
return Collections.emptyList();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package org.schabi.newpipe.extractor.services.media_ccc.extractors.data;

import com.grack.nanojson.JsonObject;

import java.util.List;
import java.util.Locale;

import javax.annotation.Nullable;


/** A recording stream of a talk/event. Switch on the implementation to get the actual data. */
public interface MediaCCCRecording {

/** A recording stream of a talk/event.
* These files usually have one or more audio streams in different languages. */
class Video implements MediaCCCRecording {
public String filename;
public VideoType recordingType;
public String mimeType;
/** Each language is one separate audio track on the video. */
public List<Locale> languages;
public String url;
public int lengthSeconds;
public int width;
public int height;
}

/** Some talks have multiple kinds of video. */
enum VideoType {
/** The main recording of a talk/event. */
MAIN,
/** A side-recording of a talk/event that has the slides full-screen.
* Usually if there is a slide-recording there is a MAIN recording as well */
SLIDES
}

/** An audio recording of a talk/event.
* These audio streams are usually also available in their respective video streams.
*/
class Audio implements MediaCCCRecording {
public String filename;
public String mimeType;
public @Nullable Locale language;
public String url;
public int lengthSeconds;
}

/** A subtitle file of a talk/event. */
class Subtitle implements MediaCCCRecording {
public String filename;
public String mimeType;
public @Nullable Locale language;
public String url;
}

/** The Slides of the talk, usually as PDF file. */
class Slides implements MediaCCCRecording {
public String filename;
public String mimeType;
public String url;
public @Nullable Locale language;
}

/** Anything we can’t put in any of the other categories. */
class Unknown implements MediaCCCRecording {
public String filename;
public String mimeType;
public String url;
/** The raw object for easier debugging. */
public JsonObject rawObject;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,11 @@ public static String followGoogleRedirectIfNeeded(final String url) {
return url;
}

/**
* Check if the string is `null`, or the empty string.
* @param str string
* @return true if null or empty, false otherwise
*/
public static boolean isNullOrEmpty(final String str) {
return str == null || str.isEmpty();
}
Expand Down
Loading