head	1.2;
access;
symbols
	RELEASE_9_0_0:1.1
	RELEASE_7_4_0:1.1
	RELEASE_8_2_0:1.1
	RELEASE_6_EOL:1.1
	RELEASE_8_1_0:1.1
	RELEASE_7_3_0:1.1
	RELEASE_8_0_0:1.1
	RELEASE_7_2_0:1.1
	RELEASE_7_1_0:1.1
	RELEASE_6_4_0:1.1;
locks; strict;
comment	@# @;


1.2
date	2012.02.25.18.19.36;	author glewis;	state dead;
branches;
next	1.1;

1.1
date	2008.07.19.17.02.19;	author miwi;	state Exp;
branches;
next	;


desc
@@


1.2
log
@. Update to 3.2.  This also fixes building with openjdk6.

PR:		164813
Approved by:	Maintainer timeout
@
text
@--- src/java/au/id/jericho/lib/html/StreamEncodingDetector.java.orig	2008-06-17 21:01:53.890292905 +0200
+++ src/java/au/id/jericho/lib/html/StreamEncodingDetector.java	2008-06-17 21:02:43.940300330 +0200
@@@@ -203,9 +203,9 @@@@
 		// Assume the more likely case of four 8-bit characters <= U+00FF.
 		// Check whether it fits some common EBCDIC strings that might be found at the start of a document:
 		if (b1==0x4C) { // first character is EBCDIC '<' (ASCII 'L'), check a couple more characters before assuming EBCDIC encoding:
-			if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC ("Lo" in Windows-1252)
-			if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC ("LZ" in Windows-1252)
-			if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" ("L" in Windows-1252), or "<htm" ("L" in Windows-1252)
+			if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC 
+			if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC 
+			if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" or "<htm" 
 			// although this is not an exhaustive check for EBCDIC, it is safer to assume a more common preliminary encoding if none of these conditions are met.
 		}
 		// Now confident that it is not EBCDIC, but some other 8-bit encoding.
@


1.1
log
@Jericho HTML Parser is a simple but powerful java library allowing
analysis and manipulation of parts of an HTML document, including
some common server-side tags, while reproducing verbatim any
unrecognised or invalid HTML.

It also provides high-level HTML form manipulation functions.

WWW:	http://jerichohtml.sourceforge.net/doc/index.html

PR:		ports/124770
Submitted by:	Marcin Cieslak <saper at SYSTEM.PL>
@
text
@@

