Lightning browser with I2P configuration
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

243 lines
6.5 KiB

/*
* Copyright 2011 Peter Karich
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package acr.browser.lightning.reading;
import java.io.*;
import java.net.SocketTimeoutException;
import java.nio.charset.Charset;
import java.util.Locale;
import acr.browser.lightning.Constants;
import android.util.Log;
/**
* This class is not thread safe. Use one new instance every time due to
* encoding variable.
*
* @author Peter Karich
*/
public class Converter {
public final static String UTF8 = "UTF-8";
public final static String ISO = "ISO-8859-1";
public final static int K2 = 2048;
private int maxBytes = 1000000 / 2;
private String encoding;
private String url;
public Converter(String urlOnlyHint) {
url = urlOnlyHint;
}
public Converter() {
}
public Converter setMaxBytes(int maxBytes) {
this.maxBytes = maxBytes;
return this;
}
public static String extractEncoding(String contentType) {
String[] values;
if (contentType != null)
values = contentType.split(";");
else
values = new String[0];
String charset = "";
for (String value : values) {
value = value.trim().toLowerCase(Locale.getDefault());
if (value.startsWith("charset="))
charset = value.substring("charset=".length());
}
// http1.1 says ISO-8859-1 is the default charset
if (charset.length() == 0)
charset = ISO;
return charset;
}
public String getEncoding() {
if (encoding == null)
return "";
return encoding.toLowerCase(Locale.getDefault());
}
public String streamToString(InputStream is) {
return streamToString(is, maxBytes, encoding);
}
public String streamToString(InputStream is, String enc) {
return streamToString(is, maxBytes, enc);
}
/**
* reads bytes off the string and returns a string
*
* @param is
* @param maxBytes
* The max bytes that we want to read from the input stream
* @return String
*/
public String streamToString(InputStream is, int maxBytes, String enc) {
encoding = enc;
// Http 1.1. standard is iso-8859-1 not utf8 :(
// but we force utf-8 as youtube assumes it ;)
if (encoding == null || encoding.isEmpty())
encoding = UTF8;
BufferedInputStream in = null;
try {
in = new BufferedInputStream(is, K2);
ByteArrayOutputStream output = new ByteArrayOutputStream();
// detect encoding with the help of meta tag
try {
in.mark(K2 * 2);
String tmpEnc = detectCharset("charset=", output, in, encoding);
if (tmpEnc != null)
encoding = tmpEnc;
else {
Log.d(Constants.TAG, "no charset found in first stage");
// detect with the help of xml beginning ala
// encoding="charset"
tmpEnc = detectCharset("encoding=", output, in, encoding);
if (tmpEnc != null)
encoding = tmpEnc;
else
Log.d(Constants.TAG, "no charset found in second stage");
}
if (!Charset.isSupported(encoding))
throw new UnsupportedEncodingException(encoding);
} catch (UnsupportedEncodingException e) {
Log.d(Constants.TAG,
"Using default encoding:" + UTF8 + " problem:" + e.getMessage()
+ " encoding:" + encoding + " " + url);
encoding = UTF8;
}
// SocketException: Connection reset
// IOException: missing CR => problem on server (probably some xml
// character thing?)
// IOException: Premature EOF => socket unexpectly closed from
// server
int bytesRead = output.size();
byte[] arr = new byte[K2];
while (true) {
if (bytesRead >= maxBytes) {
Log.d(Constants.TAG, "Maxbyte of " + maxBytes
+ " exceeded! Maybe html is now broken but try it nevertheless. Url: "
+ url);
break;
}
int n = in.read(arr);
if (n < 0)
break;
bytesRead += n;
output.write(arr, 0, n);
}
return output.toString(encoding);
} catch (SocketTimeoutException e) {
Log.e(Constants.TAG, e.toString() + " url:" + url);
} catch (IOException e) {
Log.e(Constants.TAG, e.toString() + " url:" + url);
} finally {
if (in != null) {
try {
in.close();
} catch (Exception e) {
}
}
}
return "";
}
/**
* This method detects the charset even if the first call only returns some
* bytes. It will read until 4K bytes are reached and then try to determine
* the encoding
*
* @throws IOException
*/
protected String detectCharset(String key, ByteArrayOutputStream bos, BufferedInputStream in,
String enc) throws IOException {
// Grab better encoding from stream
byte[] arr = new byte[K2];
int nSum = 0;
while (nSum < K2) {
int n = in.read(arr);
if (n < 0)
break;
nSum += n;
bos.write(arr, 0, n);
}
String str = bos.toString(enc);
int encIndex = str.indexOf(key);
int clength = key.length();
if (encIndex > 0) {
char startChar = str.charAt(encIndex + clength);
int lastEncIndex;
if (startChar == '\'')
// if we have charset='something'
lastEncIndex = str.indexOf("'", ++encIndex + clength);
else if (startChar == '\"')
// if we have charset="something"
lastEncIndex = str.indexOf("\"", ++encIndex + clength);
else {
// if we have "text/html; charset=utf-8"
int first = str.indexOf("\"", encIndex + clength);
if (first < 0)
first = Integer.MAX_VALUE;
// or "text/html; charset=utf-8 "
int sec = str.indexOf(" ", encIndex + clength);
if (sec < 0)
sec = Integer.MAX_VALUE;
lastEncIndex = Math.min(first, sec);
// or "text/html; charset=utf-8 '
int third = str.indexOf("'", encIndex + clength);
if (third > 0)
lastEncIndex = Math.min(lastEncIndex, third);
}
// re-read byte array with different encoding
// assume that the encoding string cannot be greater than 40 chars
if (lastEncIndex > encIndex + clength && lastEncIndex < encIndex + clength + 40) {
String tmpEnc = SHelper.encodingCleanup(str.substring(encIndex + clength,
lastEncIndex));
try {
in.reset();
bos.reset();
return tmpEnc;
} catch (IOException ex) {
Log.e(Constants.TAG, "Couldn't reset stream to re-read with new encoding "
+ tmpEnc + " " + ex.toString());
}
}
}
return null;
}
}