Skip to content

Commit

Permalink
Merge pull request #454 from IDPF/feature/xhtml-extension
Browse files Browse the repository at this point in the history
Check XHTML Content Docs extensions in EPUB 3
  • Loading branch information
rdeltour committed Sep 12, 2014
2 parents a5fd943 + 6fd290d commit 9cd4e87
Show file tree
Hide file tree
Showing 33 changed files with 387 additions and 46 deletions.
26 changes: 15 additions & 11 deletions src/main/java/com/adobe/epubcheck/ctc/EpubHTML5StructureCheck.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
package com.adobe.epubcheck.ctc;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.ctc.epubpackage.EpubPackage;
import com.adobe.epubcheck.ctc.epubpackage.ManifestItem;
Expand All @@ -15,16 +25,6 @@
import com.adobe.epubcheck.util.SearchDictionary;
import com.adobe.epubcheck.util.SearchDictionary.DictionaryType;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

public class EpubHTML5StructureCheck implements DocumentValidator
{
static final int hasHtml = 1;
Expand Down Expand Up @@ -113,8 +113,12 @@ else if (prop.equals("rendition:layout-reflowable"))
/***VALIDATE FILE EXTENSION***/

String fileExtension = mi.getHref().substring(mi.getHref().lastIndexOf('.') + 1, mi.getHref().length());
if (!(fileExtension.compareToIgnoreCase("html") == 0 || fileExtension.compareToIgnoreCase("htm") == 0 || fileExtension.compareToIgnoreCase("xhtml") == 0))
if (epubPackage.getVersion() == EPUBVersion.VERSION_2
&& !(fileExtension.compareToIgnoreCase("html") == 0
|| fileExtension.compareToIgnoreCase("htm") == 0
|| fileExtension.compareToIgnoreCase("xhtml") == 0))
{
// Note: extension is already checked in OPFChecker30 for EPUB 3
report.message(MessageId.HTM_014, new MessageLocation(mi.getHref(), -1, -1));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ static Map<MessageId, Severity> getDefaultSeverities()
map.put(MessageId.HTM_012, Severity.USAGE);
map.put(MessageId.HTM_013, Severity.USAGE);
map.put(MessageId.HTM_014, Severity.WARNING);
map.put(MessageId.HTM_014a, Severity.WARNING);
map.put(MessageId.HTM_015, Severity.WARNING);
map.put(MessageId.HTM_016, Severity.WARNING);
map.put(MessageId.HTM_017, Severity.ERROR);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ public enum MessageId implements Comparable<MessageId>
HTM_012("HTM-012"),
HTM_013("HTM-013"),
HTM_014("HTM-014"),
HTM_014a("HTM-014a"),
HTM_015("HTM-015"),
HTM_016("HTM-016"),
HTM_017("HTM-017"),
Expand Down
1 change: 0 additions & 1 deletion src/main/java/com/adobe/epubcheck/opf/OPFChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ public void runChecks()
}

report.info(item.getPath(), FeatureEnum.DECLARED_MIMETYPE, item.getMimeType());
checkItem(item, opfHandler);
}

checkGuide();
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/com/adobe/epubcheck/opf/OPFChecker30.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import com.adobe.epubcheck.util.EPUBVersion;
import com.adobe.epubcheck.util.GenericResourceProvider;
import com.adobe.epubcheck.xml.XMLValidator;
import com.google.common.io.Files;

public class OPFChecker30 extends OPFChecker implements DocumentValidator
{
Expand Down Expand Up @@ -109,7 +110,13 @@ protected void checkItem(OPFItem item, OPFHandler opfHandler)
// "invalid content for media-type attribute");
return;
}


if ("application/xhtml+xml".equals(mimeType) && !"xhtml".equals(Files.getFileExtension(item.getPath())))
{
report.message(MessageId.HTM_014a,
new MessageLocation(path, item.getLineNumber(), item.getColumnNumber()), item.getPath());
}

if (fallback != null)
{
OPFItem fallbackItem = opfHandler.getItemById(fallback);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ HTM_011=Entity is undeclared.
HTM_011_SUG=Define the entity or use the numbered entity instead.
HTM_012=Found a link to a CFI in an external book.
HTM_013=Intra-Publication CFIs found in document.
HTM_014=Invalid file extension for HTML5 file, expecting (html, htm or xhtml).
HTM_014=Invalid file extension for HTML file, expecting (html, htm or xhtml).
HTM_014a=XHTML Content Document file name '%1$s' should have the extension '.xhtml'.
HTM_015=HTML4 DOCTYPE definition within ePub v3.
HTM_016=HTML5 DOCTYPE definition within ePub v2.
HTM_017=Content file has different language value in attributes xml:lang and lang.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,13 @@ public void testValidateEPUB20_issue267() {
List<MessageId> expectedWarnings = new ArrayList<MessageId>();
testValidateDocument("valid/issue267/", expectedErrors, expectedWarnings, "valid/issue267.txt");
}

@Test
public void testXHTMLExtension()
{
List<MessageId> expectedErrors = new ArrayList<MessageId>();
List<MessageId> expectedWarnings = new ArrayList<MessageId>();
Collections.addAll(expectedWarnings, MessageId.HTM_014);
testValidateDocument("invalid/xhtml-extension", expectedErrors, expectedWarnings);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ public void testValidateEPUB30_circularFallback()
public void testValidateEPUB30_nonresolvingFallback()
{
List<MessageId> expectedErrors = new ArrayList<MessageId>();
Collections.addAll(expectedErrors, MessageId.RSC_005, MessageId.OPF_040, MessageId.OPF_040, MessageId.MED_003);
Collections.addAll(expectedErrors, MessageId.RSC_005, MessageId.OPF_040, MessageId.MED_003);
List<MessageId> expectedWarnings = new ArrayList<MessageId>();
//dupe messages, tbf
testValidateDocument("invalid/fallbacks-nonresolving/", expectedErrors, expectedWarnings);
Expand Down Expand Up @@ -694,4 +694,14 @@ public void testCollectionPreview() {
testValidateDocument("valid/collections-preview/", expectedErrors, expectedWarnings, expectedFatals,true);
}

@Test
public void testXHTMExtension()
{
List<MessageId> expectedErrors = new ArrayList<MessageId>();
List<MessageId> expectedWarnings = new ArrayList<MessageId>();
List<MessageId> expectedFatals = new ArrayList<MessageId>();
Collections.addAll(expectedWarnings, MessageId.HTM_014a);
testValidateDocument("invalid/xhtml-extension", expectedErrors, expectedWarnings, expectedFatals, true);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
body {
color:black;
font-family: arial, helvetica, sans-serif;
}
100 changes: 100 additions & 0 deletions src/test/resources/20/expanded/invalid/xhtml-extension/EPUB/lorem.foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="la">
<head>
<title>Lorem Ipsum</title>
<link type="text/css" rel="stylesheet" href="lorem.css" />
</head>
<body>
<h1>Lorem Ipsum</h1>
<div id="ch1">
<h2>Chapter 1</h2>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam vel purus mauris, ut
auctor massa. Pellentesque non nunc risus. Fusce a massa augue. Nunc erat ante,
auctor id varius ac, vestibulum non purus. Quisque non dui in sem consectetur
condimentum non ac quam. Quisque ultricies nulla nec urna fringilla pretium.
Pellentesque dictum pulvinar purus in mattis. Aliquam vestibulum orci sed magna
vestibulum a sollicitudin lectus pharetra. Suspendisse luctus risus imperdiet nunc
condimentum malesuada. Nulla fringilla vulputate vestibulum. Sed diam dui, fringilla
quis sagittis nec, viverra et nibh.</p>

<p>Sed sollicitudin accumsan augue, quis pulvinar sem volutpat at. Vestibulum rutrum
bibendum augue sit amet accumsan. Etiam tempus malesuada libero vestibulum
fringilla. Maecenas diam nulla, ultricies ac sodales vitae, viverra ut velit.
Vivamus posuere, mi sit amet vehicula tempus, nibh purus scelerisque enim, non
vestibulum erat arcu in libero. Aliquam vel convallis nibh. Sed in nisi ipsum. Sed
sed est justo, in lacinia nulla.</p>

<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed luctus est vel lacus
ullamcorper vestibulum. Mauris est sapien, pharetra id feugiat in, ornare a erat.
Nam consectetur vehicula nisi vel faucibus. Morbi blandit augue nec lacus malesuada
venenatis. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur
ridiculus mus. Maecenas consectetur, odio vitae suscipit ullamcorper, arcu ligula
pellentesque sem, quis rhoncus enim eros id lectus. Nam ornare dui est, vel posuere
metus. Quisque non nisl metus. Pellentesque id mi nunc, in gravida metus. Nullam
neque tellus, ultricies quis laoreet vitae, imperdiet at nunc. Ut laoreet massa quis
quam vulputate et ultricies nibh consectetur. Donec convallis, nulla id ultricies
ullamcorper, diam tortor interdum dolor, vel tempor lectus urna ut est. Praesent
convallis lacus vitae justo lobortis euismod. In at ante elit.</p>

<p>Aenean quis consectetur justo. Nulla nec enim nisl. Etiam rutrum volutpat tellus, a
scelerisque mauris malesuada sit amet. Suspendisse quis urna augue. Proin tempus
hendrerit libero non cursus. Praesent non massa at nisl luctus facilisis. Nullam
pulvinar, ligula eu porta ornare, mi mi accumsan orci, a iaculis tortor lorem quis
dolor. Phasellus ante nibh, pulvinar ac pulvinar eu, pulvinar ac enim.</p>

<p>Donec vel velit id elit volutpat vestibulum vitae a erat. Duis id est id magna
aliquam pretium nec sit amet nibh. Nullam condimentum suscipit felis, sed interdum
felis dictum ac. Phasellus non nisi quis magna pellentesque auctor. Cras risus
lectus, viverra eu fringilla malesuada, rhoncus et est. Etiam rhoncus pharetra
accumsan. Nullam suscipit tellus felis.</p>
</div>
<div id="ch2">
<h2>Chapter 2</h2>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla laoreet nibh felis.
Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia
Curae; Etiam est sapien, dapibus eget gravida nec, accumsan a turpis. Nunc in nisi
ut dolor elementum porttitor. Mauris hendrerit pulvinar tincidunt. Etiam metus
metus, ullamcorper ut varius lacinia, luctus et nibh. Donec ut metus enim, id
faucibus nunc. Quisque ut iaculis mauris. Duis pellentesque nulla ut eros ultricies
quis condimentum eros adipiscing. Sed porta ultrices diam, ut sagittis lectus mattis
a. Phasellus gravida, sapien vitae mollis interdum, dui neque tempor arcu, ac ornare
leo ipsum ut nisl.</p>

<p>Donec porta, odio et aliquet molestie, felis tellus fermentum leo, id interdum magna
massa quis ligula. Integer elementum mauris eget nisl eleifend facilisis nec sit
amet tellus. Morbi consectetur dignissim egestas. Donec pulvinar, enim eu auctor
cursus, turpis arcu venenatis turpis, eu cursus magna nisl sit amet ante. Curabitur
eleifend arcu eget nibh facilisis mattis. Etiam nisl nunc, semper vitae condimentum
sed, viverra sit amet lacus. Curabitur et orci augue. Suspendisse sollicitudin
vulputate risus, sit amet consequat erat mollis eu. Nunc sodales tincidunt
tincidunt.</p>

<p>Aliquam erat volutpat. Aliquam ornare augue et nulla consequat commodo. Quisque
dictum rhoncus orci vel euismod. Proin leo turpis, adipiscing quis facilisis id,
condimentum sed metus. Nullam pellentesque scelerisque est nec tristique. Nunc augue
turpis, consequat non varius quis, aliquam auctor dolor. Cras luctus dignissim justo
sit amet laoreet. Quisque vel ipsum quis massa suscipit vehicula.</p>

<p>Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis
egestas. Vivamus fringilla eleifend magna, vel commodo turpis egestas at.
Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis
egestas. Sed eu lorem quam, et sagittis libero. Maecenas vel ante id sem bibendum
laoreet nec dignissim justo. Class aptent taciti sociosqu ad litora torquent per
conubia nostra, per inceptos himenaeos. Fusce eu lorem orci, eu viverra nisi. Lorem
ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum dapibus commodo
pellentesque. Maecenas quis est accumsan est interdum pharetra egestas nec lorem.
Nam a lectus sit amet justo facilisis suscipit.</p>

<p>Integer dolor dolor, volutpat id commodo id, gravida id risus. Donec consectetur
sollicitudin sem, non auctor urna pulvinar non. Vivamus ipsum nisi, commodo sed
scelerisque id, porta nec massa. Vestibulum ac risus et augue faucibus fermentum ut
et nisi. Integer tincidunt suscipit ipsum, sed interdum felis mollis sed.
Suspendisse potenti. Praesent et mauris et quam consequat tristique. Morbi mi dolor,
pharetra quis rutrum quis, fringilla in tortor. Sed a nulla vitae leo dapibus
cursus. Aliquam erat volutpat. Integer purus purus, dictum id bibendum at, lobortis
quis metus.</p>
</div>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/"
xmlns="http://www.daisy.org/z3986/2005/ncx/"
version="2005-1"
xml:lang="en">
<head>
<meta name="dtb:uid" content="urn:uuid:550e8400-e29b-41d4-a716-4466674412314"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>Lorem Ipsum</text>
</docTitle>
<navMap>
<navPoint id="ch1" playOrder="1">
<navLabel>
<text>Chapter 1</text>
</navLabel>
<content src="lorem.foo#ch1"/>
</navPoint>
<navPoint id="ch2" playOrder="2">
<navLabel>
<text>Chapter 2</text>
</navLabel>
<content src="lorem.foo#ch2"/>
</navPoint>
</navMap>
</ncx>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="uid">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:identifier id="uid">urn:uuid:550e8400-e29b-41d4-a716-4466674412314</dc:identifier>
<dc:title>Lorem Ipsum</dc:title>
<dc:language>la</dc:language>
<dc:date>2011-09-01</dc:date>
</metadata>
<manifest>
<item id="t1" href="lorem.foo" media-type="application/xhtml+xml" />
<item id="ncx" href="lorem.ncx" media-type="application/x-dtbncx+xml" />
<item id="css" href="lorem.css" media-type="text/css" />
</manifest>
<spine toc="ncx">
<itemref idref="t1" />
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile full-path="EPUB/lorem.opf"
media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
application/epub+zip
Binary file modified src/test/resources/30/epub/invalid/issue265c.epub
Binary file not shown.
Binary file modified src/test/resources/30/epub/valid/issue158.epub
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
body {
color:black;
font-family: arial, helvetica, sans-serif;
}
Loading

0 comments on commit 9cd4e87

Please sign in to comment.