// $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v 1.2 2004/02/10 13:41:10 woolfel Exp $
/*
* ====================================================================
* Copyright 2002-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.
package org.htmlparser.tests.parserHelperTests;
import org.htmlparser.Parser;
import org.htmlparser.RemarkNode;
import org.htmlparser.StringNode;
import org.htmlparser.tags.Tag;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.ParserException;
public class RemarkNodeParserTest extends ParserTestCase
{
public RemarkNodeParserTest(String name)
{
super(name);
}
/**
* The bug being reproduced is this : <BR>
* <!-- saved from url=(0022)http://internet.e-mail -->
* <HTML>
* <HEAD><META name="title" content="Training Introduction">
* <META name="subject" content="">
* <!--
Whats gonna happen now ?
* -->
* <TEST>
* </TEST>
*
* The above line is incorrectly parsed - the remark is not correctly identified.
* This bug was reported by Serge Kruppa (2002-Feb-08).
*/
public void testRemarkNodeBug() throws ParserException
{
createParser(
"<!-- saved from url=(0022)http://internet.e-mail -->\n"
+ "<HTML>\n"
+ "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"
+ "<META name=\"subject\" content=\"\">\n"
+ "<!--\n"
+ " Whats gonna happen now ?\n"
+ "-->\n"
+ "<TEST>\n"
+ "</TEST>\n");
Parser.setLineSeparator("\r\n");
parseAndAssertNodeCount(8);
// The first node should be a HTMLRemarkNode
assertTrue(
"First node should be a HTMLRemarkNode",
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertEquals(
"Text of the remarkNode #1",
" saved from url=(0022)http://internet.e-mail ",
remarkNode.getText());
// The sixth node should be a HTMLRemarkNode
assertTrue(
"Sixth node should be a HTMLRemarkNode",
node[5] instanceof RemarkNode);
remarkNode = (RemarkNode) node[5];
assertEquals(
"Text of the remarkNode #6",
"\r\n Whats gonna happen now ?\r\n",
remarkNode.getText());
}
public void testToPlainTextString() throws ParserException
{
createParser(
"<!-- saved from url=(0022)http://internet.e-mail -->\n"
+ "<HTML>\n"
+ "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"
+ "<META name=\"subject\" content=\"\">\n"
+ "<!--\n"
+ " Whats gonna happen now ?\n"
+ "-->\n"
+ "<TEST>\n"
+ "</TEST>\n");
Parser.setLineSeparator("\r\n");
parseAndAssertNodeCount(8);
// The first node should be a HTMLRemarkNode
assertTrue(
"First node should be a HTMLRemarkNode",
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertEquals(
"Plain Text of the remarkNode #1",
" saved from url=(0022)http://internet.e-mail ",
remarkNode.toPlainTextString());
// The sixth node should be a HTMLRemarkNode
assertTrue(
"Sixth node should be a HTMLRemarkNode",
node[5] instanceof RemarkNode);
remarkNode = (RemarkNode) node[5];
assertEquals(
"Plain Text of the remarkNode #6",
"\r\n Whats gonna happen now ?\r\n",
remarkNode.getText());
}
public void testToRawString() throws ParserException
{
createParser(
"<!-- saved from url=(0022)http://internet.e-mail -->\n"
+ "<HTML>\n"
+ "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"
+ "<META name=\"subject\" content=\"\">\n"
+ "<!--\n"
+ " Whats gonna happen now ?\n"
+ "-->\n"
+ "<TEST>\n"
+ "</TEST>\n");
Parser.setLineSeparator("\r\n");
parseAndAssertNodeCount(8);
// The first node should be a HTMLRemarkNode
assertTrue(
"First node should be a HTMLRemarkNode",
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertStringEquals(
"Raw String of the remarkNode #1",
"<!-- saved from url=(0022)http://internet.e-mail -->",
remarkNode.toHtml());
// The sixth node should be a HTMLRemarkNode
assertTrue(
"Sixth node should be a HTMLRemarkNode",
node[5] instanceof RemarkNode);
remarkNode = (RemarkNode) node[5];
assertStringEquals(
"Raw String of the remarkNode #6",
"<!--\r\n Whats gonna happen now ?\r\n-->",
remarkNode.toHtml());
}
public void testNonRemarkNode() throws ParserException
{
createParser(" <![endif]>");
parseAndAssertNodeCount(2);
// The first node should be a HTMLRemarkNode
assertTrue(
"First node should be a string node",
node[0] instanceof StringNode);
assertTrue("Second node should be a Tag", node[1] instanceof Tag);
StringNode stringNode = (StringNode) node[0];
Tag tag = (Tag) node[1];
assertEquals("Text contents", " ", stringNode.getText());
assertEquals("Tag Contents", "![endif]", tag.getText());
}
/**
* This is the simulation of bug report 586756, submitted
* by John Zook.
* If all the comment contains is a blank line, it breaks
* the state
*/
public void testRemarkNodeWithBlankLine() throws ParserException
{
createParser("<!--\n" + "\n" + "-->");
Parser.setLineSeparator("\r\n");
parseAndAssertNodeCount(1);
assertTrue(
"Node should be a HTMLRemarkNode",
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertEquals("Expected contents", "\r\n", remarkNode.getText());
}
/**
* This is the simulation of a bug report submitted
* by Claude Duguay.
* If it is a comment with nothing in it, parser crashes
*/
public void testRemarkNodeWithNothing() throws ParserException
{
createParser("<!-->");
parseAndAssertNodeCount(1);
assertTrue(
"Node should be a HTMLRemarkNode",
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertEquals("Expected contents", "", remarkNode.getText());
}
/**
* Reproduction of bug reported by John Zook [594301]
* When we have tags like :
* <!-- <A> -->
* it doesent get parsed correctly
*/
public void testTagWithinRemarkNode() throws ParserException
{
createParser("<!-- \n" + "<A>\n" + "bcd -->");
Parser.setLineSeparator("\n");
parseAndAssertNodeCount(1);
assertTrue(
"Node should be a HTMLRemarkNode",
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertStringEquals(
"Expected contents",
" \n<A>\nbcd ",
remarkNode.getText());
}
/**
* Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes.
* <<br>
* -<br>
* -<br>
* ssd --><br>
* This is not supposed to be a remarknode
*/
public void testInvalidTag() throws ParserException
{
createParser("<!\n" + "-\n" + "-\n" + "ssd -->");
Parser.setLineSeparator("\n");
parseAndAssertNodeCount(1);
assertTrue(
"Node should be a Tag but was " + node[0],
node[0] instanceof Tag);
Tag tag = (Tag) node[0];
assertStringEquals(
"Expected contents",
"!\n" + "-\n" + "-\n" + "ssd --",
tag.getText());
Parser.setLineSeparator("\r\n");
}
/**
* Bug reported by John Zook [594301]
* If dashes exist in a comment, they dont get added to the comment text
*/
public void testDashesInComment() throws ParserException
{
createParser("<!-- -- -->");
parseAndAssertNodeCount(1);
assertTrue(
"Node should be a HTMLRemarkNode but was " + node[0],
node[0] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[0];
assertEquals("Remark Node contents", " -- ", remarkNode.getText());
}
// from http://www.w3.org/MarkUp/html-spec/html-spec_3.html
//Comments
//
//To include comments in an HTML document, use a comment declaration.
//A comment declaration consists of `<!' followed by zero or more comments
//followed by `>'. Each comment starts with `--' and includes all text up to
//and including the next occurrence of `--'. In a comment declaration, white
//space is allowed after each comment, but not before the first comment. The
//entire comment declaration is ignored. (10)
//
//For example:
//
//<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
//<HEAD>
//<TITLE>HTML Comment Example</TITLE>
//<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->
//<!-- another -- -- comment -->
//<!>
//</HEAD>
//<BODY>
//<p> <!- not a comment, just regular old data characters ->
/**
* Test a comment declaration with a comment.
*/
public void testSingleComment() throws ParserException
{
createParser(
"<HTML>\n"
+ "<HEAD>\n"
+ "<TITLE>HTML Comment Test</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->\n"
+ "</BODY>\n"
+ "</HTML>\n");
parseAndAssertNodeCount(10);
assertTrue(
"Node should be a HTMLRemarkNode but was " + node[7],
node[7] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[7];
assertEquals(
"Remark Node contents",
" Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",
remarkNode.getText());
}
/**
* Test a comment declaration with two comments.
*/
public void testDoubleComment() throws ParserException
{
createParser(
"<HTML>\n"
+ "<HEAD>\n"
+ "<TITLE>HTML Comment Test</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!-- another -- -- comment -->\n"
+ "</BODY>\n"
+ "</HTML>\n");
parseAndAssertNodeCount(10);
assertTrue(
"Node should be a HTMLRemarkNode but was " + node[7],
node[7] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[7];
assertEquals(
"Remark Node contents",
" another -- -- comment ",
remarkNode.getText());
}
/**
* Test a comment declaration without any comments.
*/
public void testEmptyComment() throws ParserException
{
createParser(
"<HTML>\n"
+ "<HEAD>\n"
+ "<TITLE>HTML Comment Test 'testEmptyComment'</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!>\n"
+ "</BODY>\n"
+ "</HTML>\n");
parseAndAssertNodeCount(10);
assertTrue(
"Node should be a HTMLRemarkNode but was " + node[7],
node[7] instanceof RemarkNode);
RemarkNode remarkNode = (RemarkNode) node[7];
assertEquals("Remark Node contents", "", remarkNode.getText());
}
// /**
// * Test what the specification calls data characters.
// * Actually, no browser I've tried handles this correctly (as text).
// * Some handle it as a comment and others handle it as a tag.
// * So for now we leave this test case out.
// */
// public void testNotAComment ()
// throws
// HTMLParserException
// {
// createParser(
// "<HTML>\n"
// + "<HEAD>\n"
// + "<TITLE>HTML Comment Test 'testNotAComment'</TITLE>\n"
// + "</HEAD>\n"
// + "<BODY>\n"
// + "<!- not a comment, just regular old data characters ->\n"
// + "</BODY>\n"
// + "</HTML>\n"
// );
// parseAndAssertNodeCount(10);
// assertTrue("Node should not be a HTMLRemarkNode",!(node[7] instanceof HTMLRemarkNode));
// assertTrue("Node should be a HTMLStringNode but was "+node[7],node[7].getType()==HTMLStringNode.TYPE);
// HTMLStringNode stringNode = (HTMLStringNode)node[7];
// assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText());
// }
}