R4SAS
6 years ago
46 changed files with 2660 additions and 6427 deletions
File diff suppressed because it is too large
Load Diff
@ -1,246 +0,0 @@
@@ -1,246 +0,0 @@
|
||||
/* |
||||
* Copyright 2011 Peter Karich |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
import android.util.Log; |
||||
|
||||
import java.io.BufferedInputStream; |
||||
import java.io.ByteArrayOutputStream; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.io.UnsupportedEncodingException; |
||||
import java.nio.charset.Charset; |
||||
import java.util.Locale; |
||||
|
||||
/** |
||||
* This class is not thread safe. Use one new instance every time due to |
||||
* encoding variable. |
||||
* |
||||
* @author Peter Karich |
||||
*/ |
||||
public class Converter { |
||||
|
||||
private static final String TAG = "Converter"; |
||||
|
||||
private final static String UTF8 = "UTF-8"; |
||||
private final static String ISO = "ISO-8859-1"; |
||||
private final static int K2 = 2048; |
||||
private int maxBytes = 1000000 / 2; |
||||
private String encoding; |
||||
private String url; |
||||
|
||||
public Converter(String urlOnlyHint) { |
||||
url = urlOnlyHint; |
||||
} |
||||
|
||||
public Converter() { |
||||
} |
||||
|
||||
public Converter setMaxBytes(int maxBytes) { |
||||
this.maxBytes = maxBytes; |
||||
return this; |
||||
} |
||||
|
||||
public static String extractEncoding(String contentType) { |
||||
String[] values; |
||||
if (contentType != null) |
||||
values = contentType.split(";"); |
||||
else |
||||
values = new String[0]; |
||||
|
||||
String charset = ""; |
||||
|
||||
for (String value : values) { |
||||
value = value.trim().toLowerCase(Locale.getDefault()); |
||||
|
||||
if (value.startsWith("charset=")) |
||||
charset = value.substring("charset=".length()); |
||||
} |
||||
|
||||
// http1.1 says ISO-8859-1 is the default charset
|
||||
if (charset.isEmpty()) |
||||
charset = ISO; |
||||
|
||||
return charset; |
||||
} |
||||
|
||||
public String getEncoding() { |
||||
if (encoding == null) |
||||
return ""; |
||||
return encoding.toLowerCase(Locale.getDefault()); |
||||
} |
||||
|
||||
public String streamToString(InputStream is) { |
||||
return streamToString(is, maxBytes, encoding); |
||||
} |
||||
|
||||
public String streamToString(InputStream is, String enc) { |
||||
return streamToString(is, maxBytes, enc); |
||||
} |
||||
|
||||
/** |
||||
* reads bytes off the string and returns a string |
||||
* |
||||
* @param is input stream to read |
||||
* @param maxBytes |
||||
* The max bytes that we want to read from the input stream |
||||
* @return String |
||||
*/ |
||||
private String streamToString(InputStream is, int maxBytes, String enc) { |
||||
encoding = enc; |
||||
// Http 1.1. standard is iso-8859-1 not utf8 :(
|
||||
// but we force utf-8 as youtube assumes it ;)
|
||||
if (encoding == null || encoding.isEmpty()) |
||||
encoding = UTF8; |
||||
|
||||
BufferedInputStream in = null; |
||||
try { |
||||
in = new BufferedInputStream(is, K2); |
||||
ByteArrayOutputStream output = new ByteArrayOutputStream(); |
||||
|
||||
// detect encoding with the help of meta tag
|
||||
try { |
||||
in.mark(K2 * 2); |
||||
String tmpEnc = detectCharset("charset=", output, in, encoding); |
||||
if (tmpEnc != null) |
||||
encoding = tmpEnc; |
||||
else { |
||||
Log.d(TAG, "no charset found in first stage"); |
||||
// detect with the help of xml beginning ala
|
||||
// encoding="charset"
|
||||
tmpEnc = detectCharset("encoding=", output, in, encoding); |
||||
if (tmpEnc != null) |
||||
encoding = tmpEnc; |
||||
else |
||||
Log.d(TAG, "no charset found in second stage"); |
||||
} |
||||
|
||||
if (!Charset.isSupported(encoding)) |
||||
throw new UnsupportedEncodingException(encoding); |
||||
} catch (UnsupportedEncodingException e) { |
||||
Log.d(TAG, |
||||
"Using default encoding:" + UTF8 + " problem:" + e.getMessage() |
||||
+ " encoding:" + encoding + ' ' + url); |
||||
encoding = UTF8; |
||||
} |
||||
|
||||
// SocketException: Connection reset
|
||||
// IOException: missing CR => problem on server (probably some xml
|
||||
// character thing?)
|
||||
// IOException: Premature EOF => socket unexpectly closed from
|
||||
// server
|
||||
int bytesRead = output.size(); |
||||
byte[] arr = new byte[K2]; |
||||
while (true) { |
||||
if (bytesRead >= maxBytes) { |
||||
Log.d(TAG, "Maxbyte of " + maxBytes |
||||
+ " exceeded! Maybe html is now broken but try it nevertheless. Url: " |
||||
+ url); |
||||
break; |
||||
} |
||||
|
||||
int n = in.read(arr); |
||||
if (n < 0) |
||||
break; |
||||
bytesRead += n; |
||||
output.write(arr, 0, n); |
||||
} |
||||
|
||||
return output.toString(encoding); |
||||
} catch (IOException e) { |
||||
Log.e(TAG, e.toString() + " url:" + url); |
||||
} finally { |
||||
if (in != null) { |
||||
try { |
||||
in.close(); |
||||
} catch (Exception e) { |
||||
e.printStackTrace(); |
||||
} |
||||
} |
||||
} |
||||
return ""; |
||||
} |
||||
|
||||
/** |
||||
* This method detects the charset even if the first call only returns some |
||||
* bytes. It will read until 4K bytes are reached and then try to determine |
||||
* the encoding |
||||
* |
||||
* @throws IOException |
||||
*/ |
||||
private static String detectCharset(String key, ByteArrayOutputStream bos, BufferedInputStream in, |
||||
String enc) throws IOException { |
||||
|
||||
// Grab better encoding from stream
|
||||
byte[] arr = new byte[K2]; |
||||
int nSum = 0; |
||||
while (nSum < K2) { |
||||
int n = in.read(arr); |
||||
if (n < 0) |
||||
break; |
||||
|
||||
nSum += n; |
||||
bos.write(arr, 0, n); |
||||
} |
||||
|
||||
String str = bos.toString(enc); |
||||
int encIndex = str.indexOf(key); |
||||
int clength = key.length(); |
||||
if (encIndex > 0) { |
||||
char startChar = str.charAt(encIndex + clength); |
||||
int lastEncIndex; |
||||
if (startChar == '\'') |
||||
// if we have charset='something'
|
||||
lastEncIndex = str.indexOf('\'', ++encIndex + clength); |
||||
else if (startChar == '\"') |
||||
// if we have charset="something"
|
||||
lastEncIndex = str.indexOf('\"', ++encIndex + clength); |
||||
else { |
||||
// if we have "text/html; charset=utf-8"
|
||||
int first = str.indexOf('\"', encIndex + clength); |
||||
if (first < 0) |
||||
first = Integer.MAX_VALUE; |
||||
|
||||
// or "text/html; charset=utf-8 "
|
||||
int sec = str.indexOf(' ', encIndex + clength); |
||||
if (sec < 0) |
||||
sec = Integer.MAX_VALUE; |
||||
lastEncIndex = Math.min(first, sec); |
||||
|
||||
// or "text/html; charset=utf-8 '
|
||||
int third = str.indexOf('\'', encIndex + clength); |
||||
if (third > 0) |
||||
lastEncIndex = Math.min(lastEncIndex, third); |
||||
} |
||||
|
||||
// re-read byte array with different encoding
|
||||
// assume that the encoding string cannot be greater than 40 chars
|
||||
if (lastEncIndex > encIndex + clength && lastEncIndex < encIndex + clength + 40) { |
||||
String tmpEnc = SHelper.encodingCleanup(str.substring(encIndex + clength, |
||||
lastEncIndex)); |
||||
try { |
||||
in.reset(); |
||||
bos.reset(); |
||||
return tmpEnc; |
||||
} catch (IOException ex) { |
||||
Log.e(TAG, "Couldn't reset stream to re-read with new encoding " |
||||
+ tmpEnc + ' ' + ex.toString()); |
||||
} |
||||
} |
||||
} |
||||
return null; |
||||
} |
||||
} |
@ -1,483 +0,0 @@
@@ -1,483 +0,0 @@
|
||||
/* |
||||
* Copyright 2011 Peter Karich |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
import java.io.BufferedReader; |
||||
import java.io.BufferedWriter; |
||||
import java.io.FileReader; |
||||
import java.io.FileWriter; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.net.HttpURLConnection; |
||||
import java.net.MalformedURLException; |
||||
import java.net.Proxy; |
||||
import java.net.URL; |
||||
import java.util.LinkedHashSet; |
||||
import java.util.Set; |
||||
import java.util.concurrent.atomic.AtomicInteger; |
||||
import java.util.regex.Pattern; |
||||
import java.util.zip.GZIPInputStream; |
||||
import java.util.zip.Inflater; |
||||
import java.util.zip.InflaterInputStream; |
||||
|
||||
import org.purplei2p.lightning.utils.Utils; |
||||
|
||||
/** |
||||
* Class to fetch articles. This class is thread safe. |
||||
* |
||||
* @author Peter Karich |
||||
*/ |
||||
public class HtmlFetcher { |
||||
|
||||
private static final Pattern SPACE = Pattern.compile(" "); |
||||
|
||||
static { |
||||
SHelper.enableCookieMgmt(); |
||||
SHelper.enableUserAgentOverwrite(); |
||||
SHelper.enableAnySSL(); |
||||
} |
||||
|
||||
public static void main(String[] args) throws Exception { |
||||
BufferedReader reader = null; |
||||
BufferedWriter writer = null; |
||||
try { |
||||
|
||||
//noinspection IOResourceOpenedButNotSafelyClosed
|
||||
reader = new BufferedReader(new FileReader("urls.txt")); |
||||
String line; |
||||
Set<String> existing = new LinkedHashSet<>(); |
||||
while ((line = reader.readLine()) != null) { |
||||
int index1 = line.indexOf('\"'); |
||||
int index2 = line.indexOf('\"', index1 + 1); |
||||
String url = line.substring(index1 + 1, index2); |
||||
String domainStr = SHelper.extractDomain(url, true); |
||||
String counterStr = ""; |
||||
// TODO more similarities
|
||||
if (existing.contains(domainStr)) |
||||
counterStr = "2"; |
||||
else |
||||
existing.add(domainStr); |
||||
|
||||
String html = new HtmlFetcher().fetchAsString(url, 2000); |
||||
String outFile = domainStr + counterStr + ".html"; |
||||
//noinspection IOResourceOpenedButNotSafelyClosed
|
||||
writer = new BufferedWriter(new FileWriter(outFile)); |
||||
writer.write(html); |
||||
} |
||||
} finally { |
||||
Utils.close(reader); |
||||
Utils.close(writer); |
||||
} |
||||
} |
||||
|
||||
private String referrer = "http://jetsli.de/crawler"; |
||||
private String userAgent = "Mozilla/5.0 (compatible; Jetslide; +" + referrer + ')'; |
||||
private String cacheControl = "max-age=0"; |
||||
private String language = "en-us"; |
||||
private String accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"; |
||||
private String charset = "UTF-8"; |
||||
private SCache cache; |
||||
private final AtomicInteger cacheCounter = new AtomicInteger(0); |
||||
private int maxTextLength = -1; |
||||
private ArticleTextExtractor extractor = new ArticleTextExtractor(); |
||||
private Set<String> furtherResolveNecessary = new LinkedHashSet<String>() { |
||||
{ |
||||
add("bit.ly"); |
||||
add("cli.gs"); |
||||
add("deck.ly"); |
||||
add("fb.me"); |
||||
add("feedproxy.google.com"); |
||||
add("flic.kr"); |
||||
add("fur.ly"); |
||||
add("goo.gl"); |
||||
add("is.gd"); |
||||
add("ink.co"); |
||||
add("j.mp"); |
||||
add("lnkd.in"); |
||||
add("on.fb.me"); |
||||
add("ow.ly"); |
||||
add("plurl.us"); |
||||
add("sns.mx"); |
||||
add("snurl.com"); |
||||
add("su.pr"); |
||||
add("t.co"); |
||||
add("tcrn.ch"); |
||||
add("tl.gd"); |
||||
add("tiny.cc"); |
||||
add("tinyurl.com"); |
||||
add("tmi.me"); |
||||
add("tr.im"); |
||||
add("twurl.nl"); |
||||
} |
||||
}; |
||||
|
||||
public HtmlFetcher() { |
||||
} |
||||
|
||||
public void setExtractor(ArticleTextExtractor extractor) { |
||||
this.extractor = extractor; |
||||
} |
||||
|
||||
public ArticleTextExtractor getExtractor() { |
||||
return extractor; |
||||
} |
||||
|
||||
public HtmlFetcher setCache(SCache cache) { |
||||
this.cache = cache; |
||||
return this; |
||||
} |
||||
|
||||
public SCache getCache() { |
||||
return cache; |
||||
} |
||||
|
||||
public int getCacheCounter() { |
||||
return cacheCounter.get(); |
||||
} |
||||
|
||||
public HtmlFetcher clearCacheCounter() { |
||||
cacheCounter.set(0); |
||||
return this; |
||||
} |
||||
|
||||
public HtmlFetcher setMaxTextLength(int maxTextLength) { |
||||
this.maxTextLength = maxTextLength; |
||||
return this; |
||||
} |
||||
|
||||
public int getMaxTextLength() { |
||||
return maxTextLength; |
||||
} |
||||
|
||||
public void setAccept(String accept) { |
||||
this.accept = accept; |
||||
} |
||||
|
||||
public void setCharset(String charset) { |
||||
this.charset = charset; |
||||
} |
||||
|
||||
public void setCacheControl(String cacheControl) { |
||||
this.cacheControl = cacheControl; |
||||
} |
||||
|
||||
public String getLanguage() { |
||||
return language; |
||||
} |
||||
|
||||
public void setLanguage(String language) { |
||||
this.language = language; |
||||
} |
||||
|
||||
public String getReferrer() { |
||||
return referrer; |
||||
} |
||||
|
||||
public HtmlFetcher setReferrer(String referrer) { |
||||
this.referrer = referrer; |
||||
return this; |
||||
} |
||||
|
||||
public String getUserAgent() { |
||||
return userAgent; |
||||
} |
||||
|
||||
public void setUserAgent(String userAgent) { |
||||
this.userAgent = userAgent; |
||||
} |
||||
|
||||
public String getAccept() { |
||||
return accept; |
||||
} |
||||
|
||||
public String getCacheControl() { |
||||
return cacheControl; |
||||
} |
||||
|
||||
public String getCharset() { |
||||
return charset; |
||||
} |
||||
|
||||
public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception { |
||||
return fetchAndExtract(url, timeout, resolve, 0, false); |
||||
} |
||||
|
||||
// main workhorse to call externally
|
||||
@SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter") |
||||
private JResult fetchAndExtract(String url, int timeout, boolean resolve, |
||||
int maxContentSize, boolean forceReload) throws Exception { |
||||
String originalUrl = url; |
||||
url = SHelper.removeHashbang(url); |
||||
String gUrl = SHelper.getUrlFromUglyGoogleRedirect(url); |
||||
if (gUrl != null) |
||||
url = gUrl; |
||||
else { |
||||
gUrl = SHelper.getUrlFromUglyFacebookRedirect(url); |
||||
if (gUrl != null) |
||||
url = gUrl; |
||||
} |
||||
|
||||
if (resolve) { |
||||
// check if we can avoid resolving the URL (which hits the website!)
|
||||
JResult res = getFromCache(url, originalUrl); |
||||
if (res != null) |
||||
return res; |
||||
|
||||
String resUrl = getResolvedUrl(url, timeout, 0); |
||||
if (resUrl.isEmpty()) { |
||||
|
||||
JResult result = new JResult(); |
||||
if (cache != null) |
||||
cache.put(url, result); |
||||
return result.setUrl(url); |
||||
} |
||||
|
||||
// if resolved url is different then use it!
|
||||
if (!resUrl.equals(url)) { |
||||
// this is necessary e.g. for some homebaken url resolvers which return
|
||||
// the resolved url relative to url!
|
||||
url = SHelper.useDomainOfFirstArg4Second(url, resUrl); |
||||
} |
||||
} |
||||
|
||||
// check if we have the (resolved) URL in cache
|
||||
JResult res = getFromCache(url, originalUrl); |
||||
if (res != null) |
||||
return res; |
||||
|
||||
JResult result = new JResult(); |
||||
// or should we use? <link rel="canonical" href="http://www.N24.de/news/newsitem_6797232.html"/>
|
||||
result.setUrl(url); |
||||
result.setOriginalUrl(originalUrl); |
||||
|
||||
// Immediately put the url into the cache as extracting content takes time.
|
||||
if (cache != null) { |
||||
cache.put(originalUrl, result); |
||||
cache.put(url, result); |
||||
} |
||||
|
||||
// extract content to the extent appropriate for content type
|
||||
String lowerUrl = url.toLowerCase(); |
||||
if (SHelper.isDoc(lowerUrl) || SHelper.isApp(lowerUrl) || SHelper.isPackage(lowerUrl)) { |
||||
// skip
|
||||
} else if (SHelper.isVideo(lowerUrl) || SHelper.isAudio(lowerUrl)) { |
||||
result.setVideoUrl(url); |
||||
} else if (SHelper.isImage(lowerUrl)) { |
||||
result.setImageUrl(url); |
||||
} else { |
||||
try { |
||||
String urlToDownload = url; |
||||
if (forceReload) { |
||||
urlToDownload = getURLtoBreakCache(url); |
||||
} |
||||
extractor.extractContent(result, fetchAsString(urlToDownload, timeout), maxContentSize); |
||||
} catch (IOException io) { |
||||
// do nothing
|
||||
} |
||||
if (result.getFaviconUrl().isEmpty()) |
||||
result.setFaviconUrl(SHelper.getDefaultFavicon(url)); |
||||
|
||||
// some links are relative to root and do not include the domain of the url :(
|
||||
if (!result.getFaviconUrl().isEmpty()) |
||||
result.setFaviconUrl(fixUrl(url, result.getFaviconUrl())); |
||||
|
||||
if (!result.getImageUrl().isEmpty()) |
||||
result.setImageUrl(fixUrl(url, result.getImageUrl())); |
||||
|
||||
if (!result.getVideoUrl().isEmpty()) |
||||
result.setVideoUrl(fixUrl(url, result.getVideoUrl())); |
||||
|
||||
if (!result.getRssUrl().isEmpty()) |
||||
result.setRssUrl(fixUrl(url, result.getRssUrl())); |
||||
} |
||||
result.setText(lessText(result.getText())); |
||||
synchronized (result) { |
||||
result.notifyAll(); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
// Ugly hack to break free from any cached versions, a few URLs required this.
|
||||
private static String getURLtoBreakCache(String url) { |
||||
try { |
||||
URL aURL = new URL(url); |
||||
if (aURL.getQuery() != null && aURL.getQuery().isEmpty()) { |
||||
return url + "?1"; |
||||
} else { |
||||
return url + "&1"; |
||||
} |
||||
} catch (MalformedURLException e) { |
||||
return url; |
||||
} |
||||
} |
||||
|
||||
private String lessText(String text) { |
||||
if (text == null) |
||||
return ""; |
||||
|
||||
if (maxTextLength >= 0 && text.length() > maxTextLength) |
||||
return text.substring(0, maxTextLength); |
||||
|
||||
return text; |
||||
} |
||||
|
||||
private static String fixUrl(String url, String urlOrPath) { |
||||
return SHelper.useDomainOfFirstArg4Second(url, urlOrPath); |
||||
} |
||||
|
||||
private String fetchAsString(String urlAsString, int timeout) |
||||
throws IOException { |
||||
return fetchAsString(urlAsString, timeout, true); |
||||
} |
||||
|
||||
// main routine to get raw webpage content
|
||||
private String fetchAsString(String urlAsString, int timeout, boolean includeSomeGooseOptions) |
||||
throws IOException { |
||||
HttpURLConnection hConn = createUrlConnection(urlAsString, timeout, includeSomeGooseOptions); |
||||
hConn.setInstanceFollowRedirects(true); |
||||
String encoding = hConn.getContentEncoding(); |
||||
InputStream is; |
||||
if ("gzip".equalsIgnoreCase(encoding)) { |
||||
is = new GZIPInputStream(hConn.getInputStream()); |
||||
} else if ("deflate".equalsIgnoreCase(encoding)) { |
||||
is = new InflaterInputStream(hConn.getInputStream(), new Inflater(true)); |
||||
} else { |
||||
is = hConn.getInputStream(); |
||||
} |
||||
|
||||
String enc = Converter.extractEncoding(hConn.getContentType()); |
||||
return createConverter(urlAsString).streamToString(is, enc); |
||||
} |
||||
|
||||
private static Converter createConverter(String url) { |
||||
return new Converter(url); |
||||
} |
||||
|
||||
/** |
||||
* On some devices we have to hack: |
||||
* http://developers.sun.com/mobility/reference/techart/design_guidelines/http_redirection.html
|
||||
* |
||||
* @param timeout Sets a specified timeout value, in milliseconds |
||||
* @return the resolved url if any. Or null if it couldn't resolve the url |
||||
* (within the specified time) or the same url if response code is OK |
||||
*/ |
||||
private String getResolvedUrl(String urlAsString, int timeout, |
||||
int num_redirects) { |
||||
String newUrl; |
||||
int responseCode; |
||||
try { |
||||
HttpURLConnection hConn = createUrlConnection(urlAsString, timeout, true); |
||||
// force no follow
|
||||
hConn.setInstanceFollowRedirects(false); |
||||
// the program doesn't care what the content actually is !!
|
||||
// http://java.sun.com/developer/JDCTechTips/2003/tt0422.html
|
||||
hConn.setRequestMethod("HEAD"); |
||||
hConn.connect(); |
||||
responseCode = hConn.getResponseCode(); |
||||
hConn.getInputStream().close(); |
||||
if (responseCode == HttpURLConnection.HTTP_OK) |
||||
return urlAsString; |
||||
|
||||
newUrl = hConn.getHeaderField("Location"); |
||||
// Note that the max recursion level is 5.
|
||||
if (responseCode / 100 == 3 && newUrl != null && num_redirects < 5) { |
||||
newUrl = SPACE.matcher(newUrl).replaceAll("+"); |
||||
// some services use (none-standard) utf8 in their location header
|
||||
if (urlAsString.contains("://bit.ly") |
||||
|| urlAsString.contains("://is.gd")) |
||||
newUrl = encodeUriFromHeader(newUrl); |
||||
|
||||
// AP: This code is not longer need, instead we always follow
|
||||
// multiple redirects.
|
||||
//
|
||||
// fix problems if shortened twice. as it is often the case after twitters' t.co bullshit
|
||||
//if (furtherResolveNecessary.contains(SHelper.extractDomain(newUrl, true)))
|
||||
// newUrl = getResolvedUrl(newUrl, timeout);
|
||||
|
||||
// Add support for URLs with multiple levels of redirection,
|
||||
// call getResolvedUrl until there is no more redirects or a
|
||||
// max number of redirects is reached.
|
||||
newUrl = SHelper.useDomainOfFirstArg4Second(urlAsString, newUrl); |
||||
newUrl = getResolvedUrl(newUrl, timeout, num_redirects + 1); |
||||
return newUrl; |
||||
} else |
||||
return urlAsString; |
||||
|
||||
} catch (Exception ex) { |
||||
return ""; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Takes a URI that was decoded as ISO-8859-1 and applies percent-encoding |
||||
* to non-ASCII characters. Workaround for broken origin servers that send |
||||
* UTF-8 in the Location: header. |
||||
*/ |
||||
private static String encodeUriFromHeader(String badLocation) { |
||||
StringBuilder sb = new StringBuilder(badLocation.length()); |
||||
|
||||
for (char ch : badLocation.toCharArray()) { |
||||
if (ch < (char) 128) { |
||||
sb.append(ch); |
||||
} else { |
||||
// this is ONLY valid if the uri was decoded using ISO-8859-1
|
||||
sb.append(String.format("%%%02X", (int) ch)); |
||||
} |
||||
} |
||||
|
||||
return sb.toString(); |
||||
} |
||||
|
||||
private HttpURLConnection createUrlConnection(String urlAsStr, int timeout, |
||||
boolean includeSomeGooseOptions) throws IOException { |
||||
URL url = new URL(urlAsStr); |
||||
//using proxy may increase latency
|
||||
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY); |
||||
hConn.setRequestProperty("User-Agent", userAgent); |
||||
hConn.setRequestProperty("Accept", accept); |
||||
|
||||
if (includeSomeGooseOptions) { |
||||
hConn.setRequestProperty("Accept-Language", language); |
||||
hConn.setRequestProperty("content-charset", charset); |
||||
hConn.addRequestProperty("Referer", referrer); |
||||
// avoid the cache for testing purposes only?
|
||||
hConn.setRequestProperty("Cache-Control", cacheControl); |
||||
} |
||||
|
||||
// suggest respond to be gzipped or deflated (which is just another compression)
|
||||
// http://stackoverflow.com/q/3932117
|
||||
hConn.setRequestProperty("Accept-Encoding", "gzip, deflate"); |
||||
hConn.setConnectTimeout(timeout); |
||||
hConn.setReadTimeout(timeout); |
||||
return hConn; |
||||
} |
||||
|
||||
private JResult getFromCache(String url, String originalUrl) { |
||||
if (cache != null) { |
||||
JResult res = cache.get(url); |
||||
if (res != null) { |
||||
// e.g. the cache returned a shortened url as original url now we want to store the
|
||||
// current original url! Also it can be that the cache response to url but the JResult
|
||||
// does not contain it so overwrite it:
|
||||
res.setUrl(url); |
||||
res.setOriginalUrl(originalUrl); |
||||
cacheCounter.addAndGet(1); |
||||
return res; |
||||
} |
||||
} |
||||
return null; |
||||
} |
||||
} |
@ -1,31 +0,0 @@
@@ -1,31 +0,0 @@
|
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
import org.jsoup.nodes.Element; |
||||
|
||||
/** |
||||
* Class which encapsulates the data from an image found under an element |
||||
* |
||||
* @author Chris Alexander, chris@chris-alexander.co.uk |
||||
*/ |
||||
class ImageResult { |
||||
|
||||
private final String src; |
||||
public final Integer weight; |
||||
private final String title; |
||||
private final int height; |
||||
private final int width; |
||||
private final String alt; |
||||
private final boolean noFollow; |
||||
public Element element; |
||||
|
||||
public ImageResult(String src, Integer weight, String title, int height, int width, String alt, |
||||
boolean noFollow) { |
||||
this.src = src; |
||||
this.weight = weight; |
||||
this.title = title; |
||||
this.height = height; |
||||
this.width = width; |
||||
this.alt = alt; |
||||
this.noFollow = noFollow; |
||||
} |
||||
} |
@ -1,274 +0,0 @@
@@ -1,274 +0,0 @@
|
||||
/* |
||||
* Copyright 2011 Peter Karich |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
import java.io.Serializable; |
||||
import java.util.Collection; |
||||
import java.util.Collections; |
||||
import java.util.List; |
||||
import java.util.ArrayList; |
||||
import java.util.Date; |
||||
import java.util.HashMap; |
||||
import java.util.Map; |
||||
|
||||
|
||||
/** |
||||
* Parsed result from web page containing important title, text and image. |
||||
* |
||||
* @author Peter Karich |
||||
*/ |
||||
public class JResult implements Serializable { |
||||
|
||||
private String title; |
||||
private String url; |
||||
private String originalUrl; |
||||
private String canonicalUrl; |
||||
private String imageUrl; |
||||
private String videoUrl; |
||||
private String rssUrl; |
||||
private String text; |
||||
private String faviconUrl; |
||||
private String description; |
||||
private String authorName; |
||||
private String authorDescription; |
||||
private Date date; |
||||
private Collection<String> keywords; |
||||
private List<ImageResult> images = null; |
||||
private final List<Map<String, String>> links = new ArrayList<>(); |
||||
private String type; |
||||
private String sitename; |
||||
private String language; |
||||
|
||||
public JResult() { |
||||
} |
||||
|
||||
public String getUrl() { |
||||
if (url == null) |
||||
return ""; |
||||
return url; |
||||
} |
||||
|
||||
public JResult setUrl(String url) { |
||||
this.url = url; |
||||
return this; |
||||
} |
||||
|
||||
public JResult setOriginalUrl(String originalUrl) { |
||||
this.originalUrl = originalUrl; |
||||
return this; |
||||
} |
||||
|
||||
public String getOriginalUrl() { |
||||
return originalUrl; |
||||
} |
||||
|
||||
public JResult setCanonicalUrl(String canonicalUrl) { |
||||
this.canonicalUrl = canonicalUrl; |
||||
return this; |
||||
} |
||||
|
||||
public String getCanonicalUrl() { |
||||
return canonicalUrl; |
||||
} |
||||
|
||||
public String getFaviconUrl() { |
||||
if (faviconUrl == null) |
||||
return ""; |
||||
return faviconUrl; |
||||
} |
||||
|
||||
public JResult setFaviconUrl(String faviconUrl) { |
||||
this.faviconUrl = faviconUrl; |
||||
return this; |
||||
} |
||||
|
||||
public JResult setRssUrl(String rssUrl) { |
||||
this.rssUrl = rssUrl; |
||||
return this; |
||||
} |
||||
|
||||
public String getRssUrl() { |
||||
if (rssUrl == null) |
||||
return ""; |
||||
return rssUrl; |
||||
} |
||||
|
||||
public String getDescription() { |
||||
if (description == null) |
||||
return ""; |
||||
return description; |
||||
} |
||||
|
||||
public JResult setDescription(String description) { |
||||
this.description = description; |
||||
return this; |
||||
} |
||||
|
||||
public String getAuthorName() { |
||||
if (authorName == null) |
||||
return ""; |
||||
return authorName; |
||||
} |
||||
|
||||
public JResult setAuthorName(String authorName) { |
||||
this.authorName = authorName; |
||||
return this; |
||||
} |
||||
|
||||
public String getAuthorDescription() { |
||||
if (authorDescription == null) |
||||
return ""; |
||||
return authorDescription; |
||||
} |
||||
|
||||
public JResult setAuthorDescription(String authorDescription) { |
||||
this.authorDescription = authorDescription; |
||||
return this; |
||||
} |
||||
|
||||
public String getImageUrl() { |
||||
if (imageUrl == null) |
||||
return ""; |
||||
return imageUrl; |
||||
} |
||||
|
||||
public JResult setImageUrl(String imageUrl) { |
||||
this.imageUrl = imageUrl; |
||||
return this; |
||||
} |
||||
|
||||
public String getText() { |
||||
if (text == null) |
||||
return ""; |
||||
|
||||
return text; |
||||
} |
||||
|
||||
public JResult setText(String text) { |
||||
this.text = text; |
||||
return this; |
||||
} |
||||
|
||||
public String getTitle() { |
||||
if (title == null) |
||||
return ""; |
||||
return title; |
||||
} |
||||
|
||||
public JResult setTitle(String title) { |
||||
this.title = title; |
||||
return this; |
||||
} |
||||
|
||||
public String getVideoUrl() { |
||||
if (videoUrl == null) |
||||
return ""; |
||||
return videoUrl; |
||||
} |
||||
|
||||
public JResult setVideoUrl(String videoUrl) { |
||||
this.videoUrl = videoUrl; |
||||
return this; |
||||
} |
||||
|
||||
public JResult setDate(Date date) { |
||||
this.date = date; |
||||
return this; |
||||
} |
||||
|
||||
public Collection<String> getKeywords() { |
||||
return keywords; |
||||
} |
||||
|
||||
public void setKeywords(Collection<String> keywords) { |
||||
this.keywords = keywords; |
||||
} |
||||
|
||||
/** |
||||
* @return get date from url or guessed from text |
||||
*/ |
||||
public Date getDate() { |
||||
return date; |
||||
} |
||||
|
||||
/** |
||||
* @return images list |
||||
*/ |
||||
public List<ImageResult> getImages() { |
||||
if (images == null) |
||||
return Collections.emptyList(); |
||||
return images; |
||||
} |
||||
|
||||
/** |
||||
* @return images count |
||||
*/ |
||||
public int getImagesCount() { |
||||
if (images == null) |
||||
return 0; |
||||
return images.size(); |
||||
} |
||||
|
||||
/** |
||||
* set images list |
||||
*/ |
||||
public void setImages(List<ImageResult> images) { |
||||
this.images = images; |
||||
} |
||||
|
||||
public void addLink(String url, String text, Integer pos) { |
||||
Map<String, String> link = new HashMap<>(); |
||||
link.put("url", url); |
||||
link.put("text", text); |
||||
link.put("offset", String.valueOf(pos)); |
||||
links.add(link); |
||||
} |
||||
|
||||
public List<Map<String, String>> getLinks() { |
||||
if (links == null) |
||||
return Collections.emptyList(); |
||||
return links; |
||||
} |
||||
|
||||
public String getType() { |
||||
return type; |
||||
} |
||||
|
||||
public void setType(String type) { |
||||
this.type = type; |
||||
} |
||||
|
||||
public String getSitename() { |
||||
return sitename; |
||||
} |
||||
|
||||
public void setSitename(String sitename) { |
||||
this.sitename = sitename; |
||||
} |
||||
|
||||
public String getLanguage() { |
||||
return language; |
||||
} |
||||
|
||||
public void setLanguage(String language) { |
||||
this.language = language; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "title:" + getTitle() + " imageUrl:" + getImageUrl() + " text:" + text; |
||||
} |
||||
} |
@ -1,216 +0,0 @@
@@ -1,216 +0,0 @@
|
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
import org.jsoup.Jsoup; |
||||
import org.jsoup.nodes.Element; |
||||
import org.jsoup.select.Elements; |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import org.jsoup.nodes.Node; |
||||
import org.jsoup.nodes.TextNode; |
||||
|
||||
/** |
||||
* @author goose | jim |
||||
* @author karussell |
||||
* <p/> |
||||
* this class will be responsible for taking our top node and stripping out junk |
||||
* we don't want and getting it ready for how we want it presented to the user |
||||
*/ |
||||
public class OutputFormatter { |
||||
|
||||
private static final int MIN_FIRST_PARAGRAPH_TEXT = 50; // Min size of first paragraph
|
||||
private static final int MIN_PARAGRAPH_TEXT = 30; // Min size of any other paragraphs
|
||||
private static final List<String> NODES_TO_REPLACE = Arrays.asList("strong", "b", "i"); |
||||
private Pattern unlikelyPattern = Pattern.compile("display:none|visibility:hidden"); |
||||
private final int minFirstParagraphText; |
||||
private final int minParagraphText; |
||||
private final List<String> nodesToReplace; |
||||
private String nodesToKeepCssSelector = "p, ol"; |
||||
|
||||
public OutputFormatter() { |
||||
this(MIN_FIRST_PARAGRAPH_TEXT, MIN_PARAGRAPH_TEXT, NODES_TO_REPLACE); |
||||
} |
||||
|
||||
public OutputFormatter(int minParagraphText) { |
||||
this(minParagraphText, minParagraphText, NODES_TO_REPLACE); |
||||
} |
||||
|
||||
public OutputFormatter(int minFirstParagraphText, int minParagraphText) { |
||||
this(minFirstParagraphText, minParagraphText, NODES_TO_REPLACE); |
||||
} |
||||
|
||||
private OutputFormatter(int minFirstParagraphText, int minParagraphText, |
||||
List<String> nodesToReplace) { |
||||
this.minFirstParagraphText = minFirstParagraphText; |
||||
this.minParagraphText = minParagraphText; |
||||
this.nodesToReplace = nodesToReplace; |
||||
} |
||||
|
||||
/** |
||||
* set elements to keep in output text |
||||
*/ |
||||
public void setNodesToKeepCssSelector(String nodesToKeepCssSelector) { |
||||
this.nodesToKeepCssSelector = nodesToKeepCssSelector; |
||||
} |
||||
|
||||
/** |
||||
* takes an element and turns the P tags into \n\n |
||||
*/ |
||||
public String getFormattedText(Element topNode) { |
||||
setParagraphIndex(topNode, nodesToKeepCssSelector); |
||||
removeNodesWithNegativeScores(topNode); |
||||
StringBuilder sb = new StringBuilder(); |
||||
int countOfP = append(topNode, sb, nodesToKeepCssSelector); |
||||
String str = SHelper.innerTrim(sb.toString()); |
||||
|
||||
int topNodeLength = topNode.text().length(); |
||||
if (topNodeLength == 0) { |
||||
topNodeLength = 1; |
||||
} |
||||
|
||||
|
||||
boolean lowTextRatio = ((str.length() / (topNodeLength * 1.0)) < 0.25); |
||||
if (str.length() > 100 && countOfP > 0 && !lowTextRatio) |
||||
return str; |
||||
|
||||
// no subelements
|
||||
if (str.isEmpty() || (!topNode.text().isEmpty() |
||||
&& str.length() <= topNode.ownText().length()) |
||||
|| countOfP == 0 || lowTextRatio) { |
||||
str = topNode.text(); |
||||
} |
||||
|
||||
// if jsoup failed to parse the whole html now parse this smaller
|
||||
// snippet again to avoid html tags disturbing our text:
|
||||
return Jsoup.parse(str).text(); |
||||
} |
||||
|
||||
/** |
||||
* If there are elements inside our top node that have a negative gravity |
||||
* score remove them |
||||
*/ |
||||
private void removeNodesWithNegativeScores(Element topNode) { |
||||
Elements gravityItems = topNode.select("*[gravityScore]"); |
||||
for (Element item : gravityItems) { |
||||
int score = getScore(item); |
||||
int paragraphIndex = getParagraphIndex(item); |
||||
if (score < 0 || item.text().length() < getMinParagraph(paragraphIndex)) { |
||||
item.remove(); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private int append(Element node, StringBuilder sb, String tagName) { |
||||
int countOfP = 0; // Number of P elements in the article
|
||||
int paragraphWithTextIndex = 0; |
||||
// is select more costly then getElementsByTag?
|
||||
MAIN: |
||||
for (Element e : node.select(tagName)) { |
||||
Element tmpEl = e; |
||||
// check all elements until 'node'
|
||||
while (tmpEl != null && !tmpEl.equals(node)) { |
||||
if (unlikely(tmpEl)) |
||||
continue MAIN; |
||||
tmpEl = tmpEl.parent(); |
||||
} |
||||
|
||||
String text = node2Text(e); |
||||
if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex) |
||||
|| text.length() > SHelper.countLetters(text) * 2) { |
||||
continue; |
||||
} |
||||
|
||||
if (e.tagName().equals("p")) { |
||||
countOfP++; |
||||
} |
||||
|
||||
sb.append(text); |
||||
sb.append("\n\n"); |
||||
paragraphWithTextIndex += 1; |
||||
} |
||||
|
||||
return countOfP; |
||||
} |
||||
|
||||
private static void setParagraphIndex(Element node, String tagName) { |
||||
int paragraphIndex = 0; |
||||
for (Element e : node.select(tagName)) { |
||||
e.attr("paragraphIndex", Integer.toString(paragraphIndex++)); |
||||
} |
||||
} |
||||
|
||||
private int getMinParagraph(int paragraphIndex) { |
||||
if (paragraphIndex < 1) { |
||||
return minFirstParagraphText; |
||||
} else { |
||||
return minParagraphText; |
||||
} |
||||
} |
||||
|
||||
private static int getParagraphIndex(Element el) { |
||||
try { |
||||
return Integer.parseInt(el.attr("paragraphIndex")); |
||||
} catch (NumberFormatException ex) { |
||||
return -1; |
||||
} |
||||
} |
||||
|
||||
private static int getScore(Element el) { |
||||
try { |
||||
return Integer.parseInt(el.attr("gravityScore")); |
||||
} catch (Exception ex) { |
||||
return 0; |
||||
} |
||||
} |
||||
|
||||
private boolean unlikely(Node e) { |
||||
if (e.attr("class") != null && e.attr("class").toLowerCase().contains("caption")) |
||||
return true; |
||||
|
||||
String style = e.attr("style"); |
||||
String clazz = e.attr("class"); |
||||
return unlikelyPattern.matcher(style).find() || unlikelyPattern.matcher(clazz).find(); |
||||
} |
||||
|
||||
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) { |
||||
for (Node child : e.childNodes()) { |
||||
if (unlikely(child)) { |
||||
continue; |
||||
} |
||||
if (child instanceof TextNode) { |
||||
TextNode textNode = (TextNode) child; |
||||
String txt = textNode.text(); |
||||
accum.append(txt); |
||||
} else if (child instanceof Element) { |
||||
Element element = (Element) child; |
||||
if (accum.length() > 0 && element.isBlock() |
||||
&& !lastCharIsWhitespace(accum)) |
||||
accum.append(' '); |
||||
else if (element.tagName().equals("br")) |
||||
accum.append(' '); |
||||
appendTextSkipHidden(element, accum, indent + 1); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private static boolean lastCharIsWhitespace(StringBuilder accum) { |
||||
return accum.length() != 0 && Character.isWhitespace(accum.charAt(accum.length() - 1)); |
||||
} |
||||
|
||||
private String node2Text(Element el) { |
||||
StringBuilder sb = new StringBuilder(200); |
||||
appendTextSkipHidden(el, sb, 0); |
||||
return sb.toString(); |
||||
} |
||||
|
||||
private OutputFormatter setUnlikelyPattern(String unlikelyPattern) { |
||||
this.unlikelyPattern = Pattern.compile(unlikelyPattern); |
||||
return this; |
||||
} |
||||
|
||||
public OutputFormatter appendUnlikelyPattern(String str) { |
||||
return setUnlikelyPattern(unlikelyPattern.toString() + '|' + str); |
||||
} |
||||
} |
@ -1,29 +0,0 @@
@@ -1,29 +0,0 @@
|
||||
/* |
||||
* Copyright 2011 Peter Karich |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
/** |
||||
* |
||||
* @author Peter Karich |
||||
*/ |
||||
public interface SCache { |
||||
|
||||
JResult get(String url); |
||||
|
||||
void put(String url, JResult res); |
||||
|
||||
int getSize(); |
||||
} |
@ -1,451 +0,0 @@
@@ -1,451 +0,0 @@
|
||||
/* |
||||
* Copyright 2011 Peter Karich |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package org.purplei2p.lightning.reading; |
||||
|
||||
import org.jsoup.nodes.Element; |
||||
|
||||
import java.io.UnsupportedEncodingException; |
||||
import java.net.CookieHandler; |
||||
import java.net.CookieManager; |
||||
import java.net.CookiePolicy; |
||||
import java.net.MalformedURLException; |
||||
import java.net.URL; |
||||
import java.net.URLDecoder; |
||||
import java.net.URLEncoder; |
||||
import java.security.SecureRandom; |
||||
import java.security.cert.CertificateException; |
||||
import java.security.cert.X509Certificate; |
||||
import java.util.Date; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import javax.net.ssl.KeyManager; |
||||
import javax.net.ssl.SSLContext; |
||||
import javax.net.ssl.TrustManager; |
||||
import javax.net.ssl.X509TrustManager; |
||||
|
||||
/** |
||||
* @author Peter Karich |
||||
*/ |
||||
class SHelper { |
||||
|
||||
private static final String UTF8 = "UTF-8"; |
||||
private static final Pattern SPACE = Pattern.compile(" "); |
||||
|
||||
public static String replaceSpaces(String url) { |
||||
if (!url.isEmpty()) { |
||||
url = url.trim(); |
||||
if (url.contains(" ")) { |
||||
Matcher spaces = SPACE.matcher(url); |
||||
url = spaces.replaceAll("%20"); |
||||
} |
||||
} |
||||
return url; |
||||
} |
||||
|
||||
public static int count(String str, String substring) { |
||||
int c = 0; |
||||
int index1 = str.indexOf(substring); |
||||
if (index1 >= 0) { |
||||
c++; |
||||
c += count(str.substring(index1 + substring.length()), substring); |
||||
} |
||||
return c; |
||||
} |
||||
|
||||
/** |
||||
* remove more than two spaces or newlines |
||||
*/ |
||||
public static String innerTrim(String str) { |
||||
if (str.isEmpty()) |
||||
return ""; |
||||
|
||||
StringBuilder sb = new StringBuilder(str.length()); |
||||
boolean previousSpace = false; |
||||
for (int i = 0, length = str.length(); i < length; i++) { |
||||
char c = str.charAt(i); |
||||
if (c == ' ' || (int) c == 9 || c == '\n') { |
||||
previousSpace = true; |
||||
continue; |
||||
} |
||||
|
||||
if (previousSpace) |
||||
sb.append(' '); |
||||
|
||||
previousSpace = false; |
||||
sb.append(c); |
||||
} |
||||
return sb.toString().trim(); |
||||
} |
||||
|
||||
/** |
||||
* Starts reading the encoding from the first valid character until an |
||||
* invalid encoding character occurs. |
||||
*/ |
||||
public static String encodingCleanup(String str) { |
||||
StringBuilder sb = new StringBuilder(str.length()); |
||||
boolean startedWithCorrectString = false; |
||||
for (int i = 0; i < str.length(); i++) { |
||||
char c = str.charAt(i); |
||||
if (Character.isDigit(c) || Character.isLetter(c) || c == '-' || c == '_') { |
||||
startedWithCorrectString = true; |
||||
sb.append(c); |
||||
continue; |
||||
} |
||||
|
||||
if (startedWithCorrectString) |
||||
break; |
||||
} |
||||
return sb.toString().trim(); |
||||
} |
||||
|
||||
/** |
||||
* @return the longest substring as str1.substring(result[0], result[1]); |
||||
*/ |
||||
public static String getLongestSubstring(String str1, String str2) { |
||||
int res[] = longestSubstring(str1, str2); |
||||
if (res == null || res[0] >= res[1]) |
||||
return ""; |
||||
|
||||
return str1.substring(res[0], res[1]); |
||||
} |
||||
|
||||
private static int[] longestSubstring(String str1, String str2) { |
||||
if (str1 == null || str1.isEmpty() || str2 == null || str2.isEmpty()) |
||||
return null; |
||||
|
||||
// dynamic programming => save already identical length into array
|
||||
// to understand this algo simply print identical length in every entry of the array
|
||||
// i+1, j+1 then reuses information from i,j
|
||||
// java initializes them already with 0
|
||||
int[][] num = new int[str1.length()][str2.length()]; |
||||
int maxlen = 0; |
||||
int lastSubstrBegin = 0; |
||||
int endIndex = 0; |
||||
for (int i = 0; i < str1.length(); i++) { |
||||
for (int j = 0; j < str2.length(); j++) { |
||||
if (str1.charAt(i) == str2.charAt(j)) { |
||||
if ((i == 0) || (j == 0)) |
||||
num[i][j] = 1; |
||||
else |
||||
num[i][j] = 1 + num[i - 1][j - 1]; |
||||
|
||||
if (num[i][j] > maxlen) { |
||||
maxlen = num[i][j]; |
||||
// generate substring from str1 => i
|
||||
lastSubstrBegin = i - num[i][j] + 1; |
||||
endIndex = i + 1; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
return new int[]{lastSubstrBegin, endIndex}; |
||||
} |
||||
|
||||
public static String getDefaultFavicon(String url) { |
||||
return useDomainOfFirstArg4Second(url, "/favicon.ico"); |
||||
} |
||||
|
||||
/** |
||||
* @param urlForDomain extract the domain from this url |
||||
* @param path this url does not have a domain |
||||
* @return |
||||
*/ |
||||
public static String useDomainOfFirstArg4Second(String urlForDomain, String path) { |
||||
try { |
||||
// See: http://stackoverflow.com/questions/1389184/building-an-absolute-url-from-a-relative-url-in-java
|
||||
URL baseUrl = new URL(urlForDomain); |
||||
URL relativeurl = new URL(baseUrl, path); |
||||
return relativeurl.toString(); |
||||
} catch (MalformedURLException ex) { |
||||
return path; |
||||
} |
||||
} |
||||
|
||||
public static String extractHost(String url) { |
||||
return extractDomain(url, false); |
||||
} |
||||
|
||||
public static String extractDomain(String url, boolean aggressive) { |
||||
if (url.startsWith("http://")) |
||||
url = url.substring("http://".length()); |
||||
else if (url.startsWith("https://")) |
||||
url = url.substring("https://".length()); |
||||
|
||||
if (aggressive) { |
||||
if (url.startsWith("www.")) |
||||
url = url.substring("www.".length()); |
||||
|
||||
// strip mobile from start
|
||||
if (url.startsWith("m.")) |
||||
url = url.substring("m.".length()); |
||||
} |
||||
|
||||
int slashIndex = url.indexOf('/'); |
||||
if (slashIndex > 0) |
||||
url = url.substring(0, slashIndex); |
||||
|
||||
return url; |
||||
} |
||||
|
||||
public static boolean isVideoLink(String url) { |
||||
url = extractDomain(url, true); |
||||
return url.startsWith("youtube.com") || url.startsWith("video.yahoo.com") |
||||
|| url.startsWith("vimeo.com") || url.startsWith("blip.tv"); |
||||
} |
||||
|
||||
public static boolean isVideo(String url) { |
||||
return url.endsWith(".mpeg") || url.endsWith(".mpg") || url.endsWith(".avi") || url.endsWith(".mov") |
||||
|| url.endsWith(".mpg4") || url.endsWith(".mp4") || url.endsWith(".flv") || url.endsWith(".wmv"); |
||||
} |
||||
|
||||
public static boolean isAudio(String url) { |
||||
return url.endsWith(".mp3") || url.endsWith(".ogg") || url.endsWith(".m3u") || url.endsWith(".wav"); |
||||
} |
||||
|
||||
public static boolean isDoc(String url) { |
||||
return url.endsWith(".pdf") || url.endsWith(".ppt") || url.endsWith(".doc") |
||||
|| url.endsWith(".swf") || url.endsWith(".rtf") || url.endsWith(".xls"); |
||||
} |
||||
|
||||
public static boolean isPackage(String url) { |
||||
return url.endsWith(".gz") || url.endsWith(".tgz") || url.endsWith(".zip") |
||||
|| url.endsWith(".rar") || url.endsWith(".deb") || url.endsWith(".rpm") || url.endsWith(".7z"); |
||||
} |
||||
|
||||
public static boolean isApp(String url) { |
||||
return url.endsWith(".exe") || url.endsWith(".bin") || url.endsWith(".bat") || url.endsWith(".dmg"); |
||||
} |
||||
|
||||
public static boolean isImage(String url) { |
||||
return url.endsWith(".png") || url.endsWith(".jpeg") || url.endsWith(".gif") |
||||
|| url.endsWith(".jpg") || url.endsWith(".bmp") || url.endsWith(".ico") || url.endsWith(".eps"); |
||||
} |
||||
|
||||
/** |
||||
* @see "http://blogs.sun.com/CoreJavaTechTips/entry/cookie_handling_in_java_se" |
||||
*/ |
||||
public static void enableCookieMgmt() { |
||||
CookieManager manager = new CookieManager(); |
||||
manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL); |
||||
CookieHandler.setDefault(manager); |
||||
} |
||||
|
||||
/** |
||||
* @see "http://stackoverflow.com/questions/2529682/setting-user-agent-of-a-java-urlconnection" |
||||
*/ |
||||
public static void enableUserAgentOverwrite() { |
||||
System.setProperty("http.agent", ""); |
||||
} |
||||
|
||||
public static String getUrlFromUglyGoogleRedirect(String url) { |
||||
if (url.startsWith("https://www.google.com/url?")) { |
||||
url = url.substring("https://www.google.com/url?".length()); |
||||
String arr[] = urlDecode(url).split("&"); |
||||
for (String str : arr) { |
||||
if (str.startsWith("q=")) |
||||
return str.substring("q=".length()); |
||||
} |
||||
} |
||||
|
||||
return null; |
||||
} |
||||
|
||||
public static String getUrlFromUglyFacebookRedirect(String url) { |
||||
if (url.startsWith("https://www.facebook.com/l.php?u=")) { |
||||
url = url.substring("https://www.facebook.com/l.php?u=".length()); |
||||
return urlDecode(url); |
||||
} |
||||
|
||||
return null; |
||||
} |
||||
|
||||
public static String urlEncode(String str) { |
||||
try { |
||||
return URLEncoder.encode(str, UTF8); |
||||
} catch (UnsupportedEncodingException ex) { |
||||
return str; |
||||
} |
||||
} |
||||
|
||||
private static String urlDecode(String str) { |
||||
try { |
||||
return URLDecoder.decode(str, UTF8); |
||||
} catch (UnsupportedEncodingException ex) { |
||||
return str; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Popular sites uses the #! to indicate the importance of the following |
||||
* chars. Ugly but true. Such as: facebook, twitter, gizmodo, ... |
||||
*/ |
||||
public static String removeHashbang(String url) { |
||||
return url.replaceFirst("#!", ""); |
||||
} |
||||
|
||||
public static String printNode(Element root) { |
||||
return printNode(root, 0); |
||||
} |
||||
|
||||
private static String printNode(Element root, int indentation) { |
||||
StringBuilder sb = new StringBuilder(indentation); |
||||
for (int i = 0; i < indentation; i++) { |
||||
sb.append(' '); |
||||
} |
||||
sb.append(root.tagName()); |
||||
sb.append(':'); |
||||
sb.append(root.ownText()); |
||||
sb.append('\n'); |
||||
for (Element el : root.children()) { |
||||
sb.append(printNode(el, indentation + 1)); |
||||
sb.append('\n'); |
||||
} |
||||
return sb.toString(); |
||||
} |
||||
|
||||
public static String estimateDate(String url) { |
||||
int index = url.indexOf("://"); |
||||
if (index > 0) |
||||
url = url.substring(index + 3); |
||||
|
||||
int year = -1; |
||||
int yearCounter = -1; |
||||
int month = -1; |
||||
int monthCounter = -1; |
||||
int day = -1; |
||||
String strs[] = url.split("/"); |
||||
for (int counter = 0; counter < strs.length; counter++) { |
||||
String str = strs[counter]; |
||||
if (str.length() == 4) { |
||||
try { |
||||
year = Integer.parseInt(str); |
||||
} catch (Exception ex) { |
||||
continue; |
||||
} |
||||
if (year < 1970 || year > 3000) { |
||||
year = -1; |
||||
continue; |
||||
} |
||||
yearCounter = counter; |
||||
} else if (str.length() == 2) { |
||||
if (monthCounter < 0 && counter == yearCounter + 1) { |
||||
try { |
||||
month = Integer.parseInt(str); |
||||
} catch (Exception ex) { |
||||
continue; |
||||
} |
||||
if (month < 1 || month > 12) { |
||||
month = -1; |
||||
continue; |
||||
} |
||||
monthCounter = counter; |
||||
} else if (counter == monthCounter + 1) { |
||||
try { |
||||
day = Integer.parseInt(str); |
||||
} catch (Exception ignored) { |
||||
// ignored
|
||||
} |
||||
if (day < 1 || day > 31) { |
||||
day = -1; |
||||
continue; |
||||
} |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (year < 0) |
||||
return null; |
||||
|
||||
StringBuilder str = new StringBuilder(year); |
||||
if (month < 1) |
||||
return str.toString(); |
||||
|
||||
str.append('/'); |
||||
if (month < 10) |
||||
str.append('0'); |
||||
str.append(month); |
||||
if (day < 1) |
||||
return str.toString(); |
||||
|
||||
str.append('/'); |
||||
if (day < 10) |
||||
str.append('0'); |
||||
str.append(day); |
||||
return str.toString(); |
||||
} |
||||
|
||||
public static String completeDate(String dateStr) { |
||||
if (dateStr == null) |
||||
return null; |
||||
|
||||
int index = dateStr.indexOf('/'); |
||||
if (index > 0) { |
||||
index = dateStr.indexOf('/', index + 1); |
||||
if (index > 0) |
||||
return dateStr; |
||||
else |
||||
return dateStr + "/01"; |
||||
} |
||||
return dateStr + "/01/01"; |
||||
} |
||||
|
||||
// with the help of http://stackoverflow.com/questions/1828775/httpclient-and-ssl
|
||||
public static void enableAnySSL() { |
||||
try { |
||||
SSLContext ctx = SSLContext.getInstance("TLS"); |
||||
ctx.init(new KeyManager[0], new TrustManager[]{new DefaultTrustManager()}, new SecureRandom()); |
||||
SSLContext.setDefault(ctx); |
||||
} catch (Exception ex) { |
||||
ex.printStackTrace(); |
||||
} |
||||
} |
||||
|
||||
private static class DefaultTrustManager implements X509TrustManager { |
||||
|
||||
@Override |
||||
public void checkClientTrusted(X509Certificate[] certs, String arg1) throws CertificateException { |
||||
Date today = new Date(); |
||||
for (X509Certificate certificate : certs) { |
||||
certificate.checkValidity(today); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public void checkServerTrusted(X509Certificate[] certs, String arg1) throws CertificateException { |
||||
Date today = new Date(); |
||||
for (X509Certificate certificate : certs) { |
||||
certificate.checkValidity(today); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public X509Certificate[] getAcceptedIssuers() { |
||||
return null; |
||||
} |
||||
} |
||||
|
||||
public static int countLetters(String str) { |
||||
int len = str.length(); |
||||
int chars = 0; |
||||
for (int i = 0; i < len; i++) { |
||||
if (Character.isLetter(str.charAt(i))) |
||||
chars++; |
||||
} |
||||
return chars; |
||||
} |
||||
} |
@ -1,342 +0,0 @@
@@ -1,342 +0,0 @@
|
||||
package org.purplei2p.lightning.reading.activity; |
||||
|
||||
import android.animation.ObjectAnimator; |
||||
import android.app.Dialog; |
||||
import android.app.ProgressDialog; |
||||
import android.content.DialogInterface; |
||||
import android.content.DialogInterface.OnClickListener; |
||||
import android.content.Intent; |
||||
import android.graphics.PorterDuff; |
||||
import android.graphics.drawable.ColorDrawable; |
||||
import android.os.Bundle; |
||||
import android.support.annotation.NonNull; |
||||
import android.support.annotation.Nullable; |
||||
import android.support.v7.app.AlertDialog; |
||||
import android.support.v7.app.AppCompatActivity; |
||||
import android.support.v7.widget.Toolbar; |
||||
import android.util.Log; |
||||
import android.view.LayoutInflater; |
||||
import android.view.Menu; |
||||
import android.view.MenuItem; |
||||
import android.view.View; |
||||
import android.widget.SeekBar; |
||||
import android.widget.SeekBar.OnSeekBarChangeListener; |
||||
import android.widget.TextView; |
||||
|
||||
import javax.inject.Inject; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
import org.purplei2p.lightning.BrowserApp; |
||||
import org.purplei2p.lightning.constant.Constants; |
||||
import org.purplei2p.lightning.dialog.BrowserDialog; |
||||
import org.purplei2p.lightning.preference.PreferenceManager; |
||||
|
||||
import com.anthonycr.bonsai.Schedulers; |
||||
import com.anthonycr.bonsai.Single; |
||||
import com.anthonycr.bonsai.SingleAction; |
||||
import com.anthonycr.bonsai.SingleOnSubscribe; |
||||
import com.anthonycr.bonsai.SingleSubscriber; |
||||
import com.anthonycr.bonsai.Subscription; |
||||
|
||||
import org.purplei2p.lightning.reading.HtmlFetcher; |
||||
import org.purplei2p.lightning.reading.JResult; |
||||
import org.purplei2p.lightning.utils.ThemeUtils; |
||||
import org.purplei2p.lightning.utils.Utils; |
||||
import butterknife.BindView; |
||||
import butterknife.ButterKnife; |
||||
|
||||
public class ReadingActivity extends AppCompatActivity { |
||||
|
||||
private static final String TAG = "ReadingActivity"; |
||||
|
||||
@BindView(R.id.textViewTitle) TextView mTitle; |
||||
@BindView(R.id.textViewBody) TextView mBody; |
||||
|
||||
@Inject PreferenceManager mPreferences; |
||||
|
||||
private boolean mInvert; |
||||
private String mUrl = null; |
||||
private int mTextSize; |
||||
private ProgressDialog mProgressDialog; |
||||
private Subscription mPageLoaderSubscription; |
||||
|
||||
private static final float XXLARGE = 30.0f; |
||||
private static final float XLARGE = 26.0f; |
||||
private static final float LARGE = 22.0f; |
||||
private static final float MEDIUM = 18.0f; |
||||
private static final float SMALL = 14.0f; |
||||
private static final float XSMALL = 10.0f; |
||||
|
||||
@Override |
||||
protected void onCreate(Bundle savedInstanceState) { |
||||
BrowserApp.getAppComponent().inject(this); |
||||
|
||||
overridePendingTransition(R.anim.slide_in_from_right, R.anim.fade_out_scale); |
||||
mInvert = mPreferences.getInvertColors(); |
||||
final int color; |
||||
if (mInvert) { |
||||
setTheme(R.style.Theme_SettingsTheme_Dark); |
||||
color = ThemeUtils.getPrimaryColorDark(this); |
||||
getWindow().setBackgroundDrawable(new ColorDrawable(color)); |
||||
} else { |
||||
setTheme(R.style.Theme_SettingsTheme); |
||||
color = ThemeUtils.getPrimaryColor(this); |
||||
getWindow().setBackgroundDrawable(new ColorDrawable(color)); |
||||
} |
||||
super.onCreate(savedInstanceState); |
||||
setContentView(R.layout.reading_view); |
||||
ButterKnife.bind(this); |
||||
|
||||
Toolbar toolbar = (Toolbar) findViewById(R.id.toolbar); |
||||
setSupportActionBar(toolbar); |
||||
|
||||
if (getSupportActionBar() != null) |
||||
getSupportActionBar().setDisplayHomeAsUpEnabled(true); |
||||
|
||||
mTextSize = mPreferences.getReadingTextSize(); |
||||
mBody.setTextSize(getTextSize(mTextSize)); |
||||
mTitle.setText(getString(R.string.untitled)); |
||||
mBody.setText(getString(R.string.loading)); |
||||
|
||||
mTitle.setVisibility(View.INVISIBLE); |
||||
mBody.setVisibility(View.INVISIBLE); |
||||
|
||||
Intent intent = getIntent(); |
||||
if (!loadPage(intent)) { |
||||
setText(getString(R.string.untitled), getString(R.string.loading_failed)); |
||||
} |
||||
} |
||||
|
||||
private static float getTextSize(int size) { |
||||
switch (size) { |
||||
case 0: |
||||
return XSMALL; |
||||
case 1: |
||||
return SMALL; |
||||
case 2: |
||||
return MEDIUM; |
||||
case 3: |
||||
return LARGE; |
||||
case 4: |
||||
return XLARGE; |
||||
case 5: |
||||
return XXLARGE; |
||||
default: |
||||
return MEDIUM; |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public boolean onCreateOptionsMenu(Menu menu) { |
||||
getMenuInflater().inflate(R.menu.reading, menu); |
||||
MenuItem invert = menu.findItem(R.id.invert_item); |
||||
MenuItem textSize = menu.findItem(R.id.text_size_item); |
||||
|
||||
int iconColor = ThemeUtils.getIconThemeColor(this, mInvert); |
||||
|
||||
if (invert != null && invert.getIcon() != null) { |
||||
invert.getIcon().mutate().setColorFilter(iconColor, PorterDuff.Mode.SRC_IN); |
||||
} |
||||
|
||||
if (textSize != null && textSize.getIcon() != null) { |
||||
textSize.getIcon().mutate().setColorFilter(iconColor, PorterDuff.Mode.SRC_IN); |
||||
} |
||||
|
||||
return super.onCreateOptionsMenu(menu); |
||||
} |
||||
|
||||
private boolean loadPage(Intent intent) { |
||||
if (intent == null) { |
||||
return false; |
||||
} |
||||
mUrl = intent.getStringExtra(Constants.LOAD_READING_URL); |
||||
if (mUrl == null) { |
||||
return false; |
||||
} |
||||
if (getSupportActionBar() != null) |
||||
getSupportActionBar().setTitle(Utils.getDomainName(mUrl)); |
||||
mPageLoaderSubscription = loadPage(mUrl).subscribeOn(Schedulers.worker()) |
||||
.observeOn(Schedulers.main()) |
||||
.subscribe(new SingleOnSubscribe<ReaderInfo>() { |
||||
@Override |
||||
public void onStart() { |
||||
mProgressDialog = new ProgressDialog(ReadingActivity.this); |
||||
mProgressDialog.setProgressStyle(ProgressDialog.STYLE_SPINNER); |
||||
mProgressDialog.setCancelable(false); |
||||
mProgressDialog.setIndeterminate(true); |
||||
mProgressDialog.setMessage(getString(R.string.loading)); |
||||
mProgressDialog.show(); |
||||
BrowserDialog.setDialogSize(ReadingActivity.this, mProgressDialog); |
||||
} |
||||
|
||||
@Override |
||||
public void onItem(@Nullable ReaderInfo item) { |
||||
if (item == null || item.getTitle().isEmpty() || item.getBody().isEmpty()) { |
||||
setText(getString(R.string.untitled), getString(R.string.loading_failed)); |
||||
} else { |
||||
setText(item.getTitle(), item.getBody()); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public void onError(@NonNull Throwable throwable) { |
||||
setText(getString(R.string.untitled), getString(R.string.loading_failed)); |
||||
if (mProgressDialog != null && mProgressDialog.isShowing()) { |
||||
mProgressDialog.dismiss(); |
||||
mProgressDialog = null; |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public void onComplete() { |
||||
if (mProgressDialog != null && mProgressDialog.isShowing()) { |
||||
mProgressDialog.dismiss(); |
||||
mProgressDialog = null; |
||||
} |
||||
} |
||||
}); |
||||
return true; |
||||
} |
||||
|
||||
private static Single<ReaderInfo> loadPage(@NonNull final String url) { |
||||
return Single.create(new SingleAction<ReaderInfo>() { |
||||
@Override |
||||
public void onSubscribe(@NonNull SingleSubscriber<ReaderInfo> subscriber) { |
||||
HtmlFetcher fetcher = new HtmlFetcher(); |
||||
try { |
||||
JResult result = fetcher.fetchAndExtract(url, 2500, true); |
||||
subscriber.onItem(new ReaderInfo(result.getTitle(), result.getText())); |
||||
} catch (Exception e) { |
||||
subscriber.onError(new Throwable("Encountered exception")); |
||||
Log.e(TAG, "Error parsing page", e); |
||||
} catch (OutOfMemoryError e) { |
||||
System.gc(); |
||||
subscriber.onError(new Throwable("Out of memory")); |
||||
Log.e(TAG, "Out of memory", e); |
||||
} |
||||
subscriber.onComplete(); |
||||
} |
||||
}); |
||||
} |
||||
|
||||
private static class ReaderInfo { |
||||
@NonNull private final String mTitleText; |
||||
@NonNull private final String mBodyText; |
||||
|
||||
public ReaderInfo(@NonNull String title, @NonNull String body) { |
||||
mTitleText = title; |
||||
mBodyText = body; |
||||
} |
||||
|
||||
@NonNull |
||||
public String getTitle() { |
||||
return mTitleText; |
||||
} |
||||
|
||||
@NonNull |
||||
public String getBody() { |
||||
return mBodyText; |
||||
} |
||||
} |
||||
|
||||
private void setText(String title, String body) { |
||||
if (mTitle == null || mBody == null) |
||||
return; |
||||
if (mTitle.getVisibility() == View.INVISIBLE) { |
||||
mTitle.setAlpha(0.0f); |
||||
mTitle.setVisibility(View.VISIBLE); |
||||
mTitle.setText(title); |
||||
ObjectAnimator animator = ObjectAnimator.ofFloat(mTitle, "alpha", 1.0f); |
||||
animator.setDuration(300); |
||||
animator.start(); |
||||
} else { |
||||
mTitle.setText(title); |
||||
} |
||||
|
||||
if (mBody.getVisibility() == View.INVISIBLE) { |
||||
mBody.setAlpha(0.0f); |
||||
mBody.setVisibility(View.VISIBLE); |
||||
mBody.setText(body); |
||||
ObjectAnimator animator = ObjectAnimator.ofFloat(mBody, "alpha", 1.0f); |
||||
animator.setDuration(300); |
||||
animator.start(); |
||||
} else { |
||||
mBody.setText(body); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
protected void onDestroy() { |
||||
mPageLoaderSubscription.unsubscribe(); |
||||
|
||||
if (mProgressDialog != null && mProgressDialog.isShowing()) { |
||||
mProgressDialog.dismiss(); |
||||
mProgressDialog = null; |
||||
} |
||||
super.onDestroy(); |
||||
} |
||||
|
||||
@Override |
||||
protected void onPause() { |
||||
super.onPause(); |
||||
if (isFinishing()) { |
||||
overridePendingTransition(R.anim.fade_in_scale, R.anim.slide_out_to_right); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public boolean onOptionsItemSelected(MenuItem item) { |
||||
switch (item.getItemId()) { |
||||
case R.id.invert_item: |
||||
mPreferences.setInvertColors(!mInvert); |
||||
Intent read = new Intent(this, ReadingActivity.class); |
||||
read.putExtra(Constants.LOAD_READING_URL, mUrl); |
||||
startActivity(read); |
||||
finish(); |
||||
break; |
||||
case R.id.text_size_item: |
||||
|
||||
View view = LayoutInflater.from(this).inflate(R.layout.dialog_seek_bar, null); |
||||
final SeekBar bar = view.findViewById(R.id.text_size_seekbar); |
||||
bar.setOnSeekBarChangeListener(new OnSeekBarChangeListener() { |
||||
|
||||
@Override |
||||
public void onProgressChanged(SeekBar view, int size, boolean user) { |
||||
mBody.setTextSize(getTextSize(size)); |
||||
} |
||||
|
||||
@Override |
||||
public void onStartTrackingTouch(SeekBar arg0) { |
||||
} |
||||
|
||||
@Override |
||||
public void onStopTrackingTouch(SeekBar arg0) { |
||||
} |
||||
|
||||
}); |
||||
bar.setMax(5); |
||||
bar.setProgress(mTextSize); |
||||
|
||||
AlertDialog.Builder builder = new AlertDialog.Builder(this) |
||||
.setView(view) |
||||
.setTitle(R.string.size) |
||||
.setPositiveButton(android.R.string.ok, new OnClickListener() { |
||||
|
||||
@Override |
||||
public void onClick(DialogInterface dialog, int arg1) { |
||||
mTextSize = bar.getProgress(); |
||||
mBody.setTextSize(getTextSize(mTextSize)); |
||||
mPreferences.setReadingTextSize(bar.getProgress()); |
||||
} |
||||
|
||||
}); |
||||
Dialog dialog = builder.show(); |
||||
BrowserDialog.setDialogSize(this, dialog); |
||||
break; |
||||
default: |
||||
finish(); |
||||
break; |
||||
} |
||||
return super.onOptionsItemSelected(item); |
||||
} |
||||
} |
@ -1,35 +0,0 @@
@@ -1,35 +0,0 @@
|
||||
package org.purplei2p.lightning.search |
||||
|
||||
import org.purplei2p.lightning.database.HistoryItem |
||||
import org.purplei2p.lightning.search.suggestions.DuckSuggestionsModel |
||||
import org.purplei2p.lightning.search.suggestions.LegworkSuggestionsModel |
||||
import android.app.Application |
||||
import com.anthonycr.bonsai.Single |
||||
import com.anthonycr.bonsai.SingleAction |
||||
|
||||
internal object SuggestionsManager { |
||||
|
||||
@JvmStatic |
||||
@Volatile var isRequestInProgress: Boolean = false |
||||
|
||||
@JvmStatic |
||||
fun createLegworkQueryObservable(query: String, application: Application) = |
||||
Single.create(SingleAction<List<HistoryItem>> { subscriber -> |
||||
isRequestInProgress = true |
||||
val results = LegworkSuggestionsModel(application).fetchResults(query) |
||||
subscriber.onItem(results) |
||||
subscriber.onComplete() |
||||
isRequestInProgress = false |
||||
}) |
||||
|
||||
@JvmStatic |
||||
fun createDuckQueryObservable(query: String, application: Application) = |
||||
Single.create(SingleAction<List<HistoryItem>> { subscriber -> |
||||
isRequestInProgress = true |
||||
val results = DuckSuggestionsModel(application).fetchResults(query) |
||||
subscriber.onItem(results) |
||||
subscriber.onComplete() |
||||
isRequestInProgress = false |
||||
}) |
||||
|
||||
} |
@ -0,0 +1,51 @@
@@ -0,0 +1,51 @@
|
||||
package org.purplei2p.lightning.search.engine; |
||||
|
||||
import android.support.annotation.NonNull; |
||||
import android.support.annotation.StringRes; |
||||
|
||||
import org.purplei2p.lightning.utils.Preconditions; |
||||
|
||||
/** |
||||
* A class representative of a search engine. |
||||
* <p> |
||||
* Contains three key pieces of information: |
||||
* <ul> |
||||
* <li>The icon shown for the search engine, should point to a local assets URL.</li> |
||||
* <li>The query URL for the search engine, the query will be appended to the end.</li> |
||||
* <li>The title string resource for the search engine.</li> |
||||
* </ul> |
||||
*/ |
||||
public class BaseSearchEngine { |
||||
|
||||
@NonNull private final String mIconUrl; |
||||
@NonNull private final String mQueryUrl; |
||||
@StringRes private final int mTitleRes; |
||||
|
||||
BaseSearchEngine(@NonNull String iconUrl, |
||||
@NonNull String queryUrl, |
||||
@StringRes int titleRes) { |
||||
|
||||
Preconditions.checkNonNull(iconUrl); |
||||
Preconditions.checkNonNull(queryUrl); |
||||
|
||||
mIconUrl = iconUrl; |
||||
mQueryUrl = queryUrl; |
||||
mTitleRes = titleRes; |
||||
} |
||||
|
||||
@NonNull |
||||
public final String getIconUrl() { |
||||
return mIconUrl; |
||||
} |
||||
|
||||
@NonNull |
||||
public final String getQueryUrl() { |
||||
return mQueryUrl; |
||||
} |
||||
|
||||
@StringRes |
||||
public final int getTitleRes() { |
||||
return mTitleRes; |
||||
} |
||||
|
||||
} |
@ -1,15 +0,0 @@
@@ -1,15 +0,0 @@
|
||||
package org.purplei2p.lightning.search.engine |
||||
|
||||
import android.support.annotation.StringRes |
||||
|
||||
/** |
||||
* A class representative of a search engine. |
||||
* |
||||
* Contains three key pieces of information: |
||||
* * The icon shown for the search engine, should point to a local assets URL. |
||||
* * The query URL for the search engine, the query will be appended to the end. |
||||
* * The title string resource for the search engine. |
||||
*/ |
||||
open class BaseSearchEngine internal constructor(val iconUrl: String, |
||||
val queryUrl: String, |
||||
@StringRes val titleRes: Int) |
@ -0,0 +1,16 @@
@@ -0,0 +1,16 @@
|
||||
package org.purplei2p.lightning.search.engine; |
||||
|
||||
import android.support.annotation.NonNull; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
|
||||
/** |
||||
* A custom search engine. |
||||
*/ |
||||
public class CustomSearch extends BaseSearchEngine { |
||||
|
||||
public CustomSearch(@NonNull String queryUrl) { |
||||
super("file:///android_asset/lightning.png", queryUrl, R.string.search_engine_custom); |
||||
} |
||||
|
||||
} |
@ -1,12 +0,0 @@
@@ -1,12 +0,0 @@
|
||||
package org.purplei2p.lightning.search.engine |
||||
|
||||
import org.purplei2p.lightning.R |
||||
|
||||
/** |
||||
* A custom search engine. |
||||
*/ |
||||
class CustomSearch(queryUrl: String) : BaseSearchEngine( |
||||
"file:///android_asset/lightning.png", |
||||
queryUrl, |
||||
R.string.search_engine_custom |
||||
) |
@ -0,0 +1,17 @@
@@ -0,0 +1,17 @@
|
||||
package org.purplei2p.lightning.search.engine; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
import org.purplei2p.lightning.constant.Constants; |
||||
|
||||
/** |
||||
* The DuckDuckGo Lite search engine. |
||||
* <p> |
||||
* See https://duckduckgo.com/assets/logo_homepage.normal.v101.png for the icon.
|
||||
*/ |
||||
public class DuckLiteSearch extends BaseSearchEngine { |
||||
|
||||
public DuckLiteSearch() { |
||||
super("file:///android_asset/duckduckgo.png", Constants.DUCK_LITE_SEARCH, R.string.search_engine_duckduckgo_lite); |
||||
} |
||||
|
||||
} |
@ -1,15 +0,0 @@
@@ -1,15 +0,0 @@
|
||||
package org.purplei2p.lightning.search.engine |
||||
|
||||
import org.purplei2p.lightning.R |
||||
import org.purplei2p.lightning.constant.Constants |
||||
|
||||
/** |
||||
* The DuckDuckGo Lite search engine. |
||||
* |
||||
* See https://duckduckgo.com/assets/logo_homepage.normal.v101.png for the icon. |
||||
*/ |
||||
class DuckLiteSearch : BaseSearchEngine( |
||||
"file:///android_asset/duckduckgo.png", |
||||
Constants.DUCK_LITE_SEARCH, |
||||
R.string.search_engine_duckduckgo_lite |
||||
) |
@ -0,0 +1,17 @@
@@ -0,0 +1,17 @@
|
||||
package org.purplei2p.lightning.search.engine; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
import org.purplei2p.lightning.constant.Constants; |
||||
|
||||
/** |
||||
* The DuckDuckGo search engine. |
||||
* <p> |
||||
* See https://duckduckgo.com/assets/logo_homepage.normal.v101.png for the icon.
|
||||
*/ |
||||
public class DuckSearch extends BaseSearchEngine { |
||||
|
||||
public DuckSearch() { |
||||
super("file:///android_asset/duckduckgo.png", Constants.DUCK_SEARCH, R.string.search_engine_duckduckgo); |
||||
} |
||||
|
||||
} |
@ -1,15 +0,0 @@
@@ -1,15 +0,0 @@
|
||||
package org.purplei2p.lightning.search.engine |
||||
|
||||
import org.purplei2p.lightning.R |
||||
import org.purplei2p.lightning.constant.Constants |
||||
|
||||
/** |
||||
* The DuckDuckGo search engine. |
||||
* |
||||
* See https://duckduckgo.com/assets/logo_homepage.normal.v101.png for the icon. |
||||
*/ |
||||
class DuckSearch : BaseSearchEngine( |
||||
"file:///android_asset/duckduckgo.png", |
||||
Constants.DUCK_SEARCH, |
||||
R.string.search_engine_duckduckgo |
||||
) |
@ -0,0 +1,17 @@
@@ -0,0 +1,17 @@
|
||||
package org.purplei2p.lightning.search.engine; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
import org.purplei2p.lightning.constant.Constants; |
||||
|
||||
/** |
||||
* The Legwork.I2P search engine. |
||||
* <p> |
||||
* See http://legwork.i2p/env/grafics/LegworkLogo_200.png for the icon.
|
||||
*/ |
||||
public class LegworkSearch extends BaseSearchEngine { |
||||
|
||||
public LegworkSearch() { |
||||
super("file:///android_asset/legwork.png", Constants.LEGWORK_SEARCH, R.string.search_engine_legwork); |
||||
} |
||||
|
||||
} |
@ -1,15 +0,0 @@
@@ -1,15 +0,0 @@
|
||||
package org.purplei2p.lightning.search.engine |
||||
|
||||
import org.purplei2p.lightning.R |
||||
import org.purplei2p.lightning.constant.Constants |
||||
|
||||
/** |
||||
* The Legwork.I2P search engine. |
||||
* |
||||
* See http://legwork.i2p/env/grafics/LegworkLogo_200.png for the icon. |
||||
*/ |
||||
class LegworkSearch : BaseSearchEngine( |
||||
"file:///android_asset/legwork.png", |
||||
Constants.LEGWORK_SEARCH, |
||||
R.string.search_engine_legwork |
||||
) |
@ -1,165 +0,0 @@
@@ -1,165 +0,0 @@
|
||||
package org.purplei2p.lightning.search.suggestions; |
||||
|
||||
import android.app.Application; |
||||
import android.support.annotation.NonNull; |
||||
import android.support.annotation.Nullable; |
||||
import android.text.TextUtils; |
||||
import android.util.Log; |
||||
|
||||
import java.io.File; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
import java.io.UnsupportedEncodingException; |
||||
import java.net.URL; |
||||
import java.net.URLEncoder; |
||||
import java.util.ArrayList; |
||||
import java.util.List; |
||||
import java.util.Locale; |
||||
import java.util.concurrent.TimeUnit; |
||||
|
||||
import org.purplei2p.lightning.database.HistoryItem; |
||||
import org.purplei2p.lightning.utils.FileUtils; |
||||
import org.purplei2p.lightning.utils.Utils; |
||||
import okhttp3.Cache; |
||||
import okhttp3.CacheControl; |
||||
import okhttp3.Interceptor; |
||||
import okhttp3.OkHttpClient; |
||||
import okhttp3.Request; |
||||
import okhttp3.Response; |
||||
import okhttp3.ResponseBody; |
||||
|
||||
/** |
||||
* The base search suggestions API. Provides common |
||||
* fetching and caching functionality for each potential |
||||
* suggestions provider. |
||||
*/ |
||||
public abstract class BaseSuggestionsModel { |
||||
|
||||
private static final String TAG = "BaseSuggestionsModel"; |
||||
|
||||
static final int MAX_RESULTS = 5; |
||||
private static final long INTERVAL_DAY = TimeUnit.DAYS.toSeconds(1); |
||||
@NonNull private static final String DEFAULT_LANGUAGE = "en"; |
||||
|
||||
@NonNull private final OkHttpClient mHttpClient; |
||||
@NonNull private final CacheControl mCacheControl; |
||||
@NonNull private final String mEncoding; |
||||
@NonNull private final String mLanguage; |
||||
|
||||
/** |
||||
* Create a URL for the given query in the given language. |
||||
* |
||||
* @param query the query that was made. |
||||
* @param language the locale of the user. |
||||
* @return should return a URL that can be fetched using a GET. |
||||
*/ |
||||
@NonNull |
||||
protected abstract String createQueryUrl(@NonNull String query, @NonNull String language); |
||||
|
||||
/** |
||||
* Parse the results of an input stream into a list of {@link HistoryItem}. |
||||
* |
||||
* @param inputStream the raw input to parse. |
||||
* @param results the list to populate. |
||||
* @throws Exception throw an exception if anything goes wrong. |
||||
*/ |
||||
protected abstract void parseResults(@NonNull InputStream inputStream, @NonNull List<HistoryItem> results) throws Exception; |
||||
|
||||
BaseSuggestionsModel(@NonNull Application application, @NonNull String encoding) { |
||||
mEncoding = encoding; |
||||
mLanguage = getLanguage(); |
||||
File suggestionsCache = new File(application.getCacheDir(), "suggestion_responses"); |
||||
mHttpClient = new OkHttpClient.Builder() |
||||
.cache(new Cache(suggestionsCache, FileUtils.megabytesToBytes(1))) |
||||
.addNetworkInterceptor(REWRITE_CACHE_CONTROL_INTERCEPTOR) |
||||
.build(); |
||||
mCacheControl = new CacheControl.Builder().maxStale(1, TimeUnit.DAYS).build(); |
||||
} |
||||
|
||||
/** |
||||
* Retrieves the results for a query. |
||||
* |
||||
* @param rawQuery the raw query to retrieve the results for. |
||||
* @return a list of history items for the query. |
||||
*/ |
||||
@NonNull |
||||
public final List<HistoryItem> fetchResults(@NonNull final String rawQuery) { |
||||
List<HistoryItem> filter = new ArrayList<>(5); |
||||
|
||||
String query; |
||||
try { |
||||
query = URLEncoder.encode(rawQuery, mEncoding); |
||||
} catch (UnsupportedEncodingException e) { |
||||
Log.e(TAG, "Unable to encode the URL", e); |
||||
|
||||
return filter; |
||||
} |
||||
|
||||
InputStream inputStream = downloadSuggestionsForQuery(query, mLanguage); |
||||
if (inputStream == null) { |
||||
// There are no suggestions for this query, return an empty list.
|
||||
return filter; |
||||
} |
||||
try { |
||||
parseResults(inputStream, filter); |
||||
} catch (Exception e) { |
||||
Log.e(TAG, "Unable to parse results", e); |
||||
} finally { |
||||
Utils.close(inputStream); |
||||
} |
||||
|
||||
return filter; |
||||
} |
||||
|
||||
/** |
||||
* This method downloads the search suggestions for the specific query. |
||||
* NOTE: This is a blocking operation, do not fetchResults on the UI thread. |
||||
* |
||||
* @param query the query to get suggestions for |
||||
* @return the cache file containing the suggestions |
||||
*/ |
||||
@Nullable |
||||
private InputStream downloadSuggestionsForQuery(@NonNull String query, @NonNull String language) { |
||||
String queryUrl = createQueryUrl(query, language); |
||||
|
||||
try { |
||||
URL url = new URL(queryUrl); |
||||
|
||||
// OkHttp automatically gzips requests
|
||||
Request suggestionsRequest = new Request.Builder().url(url) |
||||
.addHeader("Accept-Charset", mEncoding) |
||||
.cacheControl(mCacheControl) |
||||
.build(); |
||||
|
||||
Response suggestionsResponse = mHttpClient.newCall(suggestionsRequest).execute(); |
||||
|
||||
ResponseBody responseBody = suggestionsResponse.body(); |
||||
return responseBody != null ? responseBody.byteStream() : null; |
||||
} catch (IOException exception) { |
||||
Log.e(TAG, "Problem getting search suggestions", exception); |
||||
} |
||||
|
||||
return null; |
||||
} |
||||
|
||||
@NonNull |
||||
private static String getLanguage() { |
||||
String language = Locale.getDefault().getLanguage(); |
||||
if (TextUtils.isEmpty(language)) { |
||||
language = DEFAULT_LANGUAGE; |
||||
} |
||||
return language; |
||||
} |
||||
|
||||
@NonNull |
||||
private static final Interceptor REWRITE_CACHE_CONTROL_INTERCEPTOR = new Interceptor() { |
||||
@Override |
||||
public Response intercept(@NonNull Chain chain) throws IOException { |
||||
Response originalResponse = chain.proceed(chain.request()); |
||||
return originalResponse.newBuilder() |
||||
.header("cache-control", "max-age=" + INTERVAL_DAY + ", max-stale=" + INTERVAL_DAY) |
||||
.build(); |
||||
} |
||||
}; |
||||
|
||||
} |
@ -1,52 +0,0 @@
@@ -1,52 +0,0 @@
|
||||
package org.purplei2p.lightning.search.suggestions; |
||||
|
||||
import android.app.Application; |
||||
import android.support.annotation.NonNull; |
||||
|
||||
import org.json.JSONArray; |
||||
import org.json.JSONObject; |
||||
|
||||
import java.io.InputStream; |
||||
import java.util.List; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
import org.purplei2p.lightning.database.HistoryItem; |
||||
import org.purplei2p.lightning.utils.FileUtils; |
||||
|
||||
/** |
||||
* The search suggestions provider for the DuckDuckGo search engine. |
||||
*/ |
||||
public final class DuckSuggestionsModel extends BaseSuggestionsModel { |
||||
|
||||
@NonNull private static final String ENCODING = "UTF-8"; |
||||
@NonNull private final String mSearchSubtitle; |
||||
|
||||
public DuckSuggestionsModel(@NonNull Application application) { |
||||
super(application, ENCODING); |
||||
mSearchSubtitle = application.getString(R.string.suggestion); |
||||
} |
||||
|
||||
@NonNull |
||||
@Override |
||||
protected String createQueryUrl(@NonNull String query, @NonNull String language) { |
||||
return "https://duckduckgo.com/ac/?q=" + query; |
||||
} |
||||
|
||||
@Override |
||||
protected void parseResults(@NonNull InputStream inputStream, @NonNull List<HistoryItem> results) throws Exception { |
||||
String content = FileUtils.readStringFromStream(inputStream, ENCODING); |
||||
JSONArray jsonArray = new JSONArray(content); |
||||
int counter = 0; |
||||
for (int n = 0, size = jsonArray.length(); n < size; n++) { |
||||
JSONObject object = jsonArray.getJSONObject(n); |
||||
String suggestion = object.getString("phrase"); |
||||
results.add(new HistoryItem(mSearchSubtitle + " \"" + suggestion + '"', |
||||
suggestion, R.drawable.ic_search)); |
||||
counter++; |
||||
if (counter >= MAX_RESULTS) { |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
} |
@ -1,51 +0,0 @@
@@ -1,51 +0,0 @@
|
||||
package org.purplei2p.lightning.search.suggestions; |
||||
|
||||
import android.app.Application; |
||||
import android.support.annotation.NonNull; |
||||
|
||||
import org.json.JSONArray; |
||||
|
||||
import java.io.InputStream; |
||||
import java.util.List; |
||||
|
||||
import org.purplei2p.lightning.R; |
||||
import org.purplei2p.lightning.database.HistoryItem; |
||||
import org.purplei2p.lightning.utils.FileUtils; |
||||
|
||||
/** |
||||
* The search suggestions provider for the DuckDuckGo search engine. |
||||
*/ |
||||
public final class LegworkSuggestionsModel extends BaseSuggestionsModel { |
||||
|
||||
@NonNull private static final String ENCODING = "UTF-8"; |
||||
@NonNull private final String mSearchSubtitle; |
||||
|
||||
public LegworkSuggestionsModel(@NonNull Application application) { |
||||
super(application, ENCODING); |
||||
mSearchSubtitle = application.getString(R.string.suggestion); |
||||
} |
||||
|
||||
@NonNull |
||||
@Override |
||||
protected String createQueryUrl(@NonNull String query, @NonNull String language) { |
||||
return "http://legwork.i2p/suggest.json?query=" + query; |
||||
} |
||||
|
||||
@Override |
||||
protected void parseResults(@NonNull InputStream inputStream, @NonNull List<HistoryItem> results) throws Exception { |
||||
String content = FileUtils.readStringFromStream(inputStream, ENCODING); |
||||
JSONArray respArray = new JSONArray(content); |
||||
JSONArray jsonArray = respArray.getJSONArray(1); |
||||
int counter = 0; |
||||
for (int n = 0, size = jsonArray.length(); n < size; n++) { |
||||
String suggestion = jsonArray.getString(n); |
||||
results.add(new HistoryItem(mSearchSubtitle + " \"" + suggestion + '"', |
||||
suggestion, R.drawable.ic_search)); |
||||
counter++; |
||||
if (counter >= MAX_RESULTS) { |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
} |
Loading…
Reference in new issue