Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to latest htmlunit neko #2856

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ext/java/nokogiri/Html4Document.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public class Html4Document extends XmlDocument
public static IRubyObject
read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args)
{
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[3], args[2]);
ctx.setIOInputSource(context, args[0], args[1]);
return ctx.parse(context, (RubyClass) klass, args[1]);
}
Expand All @@ -150,7 +150,7 @@ public class Html4Document extends XmlDocument
public static IRubyObject
read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args)
{
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[3], args[2]);
ctx.setStringInputSource(context, args[0], args[1]);
return ctx.parse(context, (RubyClass) klass, args[1]);
}
Expand Down
77 changes: 68 additions & 9 deletions ext/java/nokogiri/Html4SaxParserContext.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package nokogiri;

import nokogiri.internals.*;
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
Expand All @@ -8,20 +11,18 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.xerces.parsers.AbstractSAXParser;
import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyFixnum;
import org.jruby.RubyString;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.xml.sax.SAXException;

import nokogiri.internals.NokogiriHandler;
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;

/**
* Class for Nokogiri::HTML4::SAX::ParserContext.
Expand All @@ -31,7 +32,7 @@
* @author Yoko Harada <yokolet@gmail.com>
*/
@JRubyClass(name = "Nokogiri::HTML4::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
public class Html4SaxParserContext extends XmlSaxParserContext
public class Html4SaxParserContext extends SaxParserContext<SAXParser>
{
private static final long serialVersionUID = 1L;

Expand All @@ -50,7 +51,7 @@ public class Html4SaxParserContext extends XmlSaxParserContext
}

@Override
protected AbstractSAXParser
protected SAXParser
createParser() throws SAXException
{
SAXParser parser = new SAXParser();
Expand Down Expand Up @@ -279,11 +280,69 @@ static EncodingType get(final int ordinal)
return ctx;
}

@Override
protected void
preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
protected Options
defaultParseOptions(ThreadContext context)
{
return new ParserContext.Options(
RubyFixnum.fix2long(Helpers.invoke(context,
((RubyClass)context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions"))
.getConstant("DEFAULT_HTML"),
"to_i"))
);
}

@JRubyMethod
public IRubyObject
parse_with(ThreadContext context, IRubyObject rubyParser)
{
return super.parse_with(context, rubyParser);
}

@JRubyMethod(name = "replace_entities=")
public IRubyObject
set_replace_entities(ThreadContext context, IRubyObject value)
{
replaceEntities = value.isTrue();
return this;
}

@JRubyMethod(name = "replace_entities")
public IRubyObject
get_replace_entities(ThreadContext context)
{
return context.runtime.newBoolean(replaceEntities);
}

@JRubyMethod(name = "recovery=")
public IRubyObject
set_recovery(ThreadContext context, IRubyObject value)
{
recovery = value.isTrue();
return this;
}

@JRubyMethod(name = "recovery")
public IRubyObject
get_recovery(ThreadContext context)
{
return context.runtime.newBoolean(recovery);
}

@JRubyMethod(name = "column")
public IRubyObject
column(ThreadContext context)
{
// this function is meant to be empty. It overrides the one in XmlSaxParserContext
final Integer number = handler.getColumn();
if (number == null) { return context.getRuntime().getNil(); }
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
}

@JRubyMethod(name = "line")
public IRubyObject
line(ThreadContext context)
{
final Integer number = handler.getLine();
if (number == null) { return context.getRuntime().getNil(); }
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
}
}
19 changes: 5 additions & 14 deletions ext/java/nokogiri/Html4SaxPushParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import nokogiri.internals.NokogiriBlockingQueueInputStream;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.ParserContext;
import nokogiri.internals.SaxParserContext;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyObject;
Expand Down Expand Up @@ -146,7 +147,7 @@ public class Html4SaxPushParser extends RubyObject
stream = new NokogiriBlockingQueueInputStream();

assert saxParser != null : "saxParser null";
parserTask = new ParserTask(context, saxParser, stream);
parserTask = new ParserTask(context, saxParser, parse(context.runtime, stream), stream);
futureTask = new FutureTask<Html4SaxParserContext>((Callable) parserTask);
executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
@Override
Expand Down Expand Up @@ -192,22 +193,12 @@ public Thread newThread(Runnable r) {
return Html4SaxParserContext.parse_stream(runtime, klazz, stream);
}

static class ParserTask extends XmlSaxPushParser.ParserTask /* <Html4SaxPushParser> */
static class ParserTask extends SaxParserContext.ParserTask<Html4SaxParserContext>
{

private
ParserTask(ThreadContext context, IRubyObject handler, InputStream stream)
{
super(context, handler, parse(context.runtime, stream), stream);
}

@Override
public Html4SaxParserContext
call() throws Exception
ParserTask(ThreadContext context, IRubyObject handler, Html4SaxParserContext parser, InputStream stream)
{
return (Html4SaxParserContext) super.call();
super(context, handler, parser, stream);
}

}

}
4 changes: 2 additions & 2 deletions ext/java/nokogiri/XmlDocument.java
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ private static class DocumentBuilderFactoryHolder
public static IRubyObject
read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args)
{
XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[3], args[2]);
ctx.setIOInputSource(context, args[0], args[1]);
return ctx.parse(context, (RubyClass) klass, args[1]);
}
Expand All @@ -361,7 +361,7 @@ private static class DocumentBuilderFactoryHolder
public static IRubyObject
read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args)
{
XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[2], args[3]);
XmlDomParserContext ctx = new XmlDomParserContext(context.runtime, args[3], args[2]);
ctx.setStringInputSource(context, args[0], args[1]);
return ctx.parse(context, (RubyClass) klass, args[1]);
}
Expand Down
Loading