Improved reading mode thanks to changes from snacktory fork by skyshard
This commit is contained in:
parent
04c9f75a90
commit
367c62bd39
@ -139,7 +139,7 @@ public class ReadingActivity extends AppCompatActivity {
|
|||||||
|
|
||||||
private final Activity mActivity;
|
private final Activity mActivity;
|
||||||
private String mTitleText;
|
private String mTitleText;
|
||||||
private List<String> mBodyText;
|
private String mBodyText;
|
||||||
|
|
||||||
public PageLoader(Activity activity) {
|
public PageLoader(Activity activity) {
|
||||||
mActivity = activity;
|
mActivity = activity;
|
||||||
@ -163,15 +163,15 @@ public class ReadingActivity extends AppCompatActivity {
|
|||||||
try {
|
try {
|
||||||
JResult result = fetcher.fetchAndExtract(params[0], 2500, true);
|
JResult result = fetcher.fetchAndExtract(params[0], 2500, true);
|
||||||
mTitleText = result.getTitle();
|
mTitleText = result.getTitle();
|
||||||
mBodyText = result.getTextList();
|
mBodyText = result.getText();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
mTitleText = "";
|
mTitleText = "";
|
||||||
mBodyText = new ArrayList<>();
|
mBodyText = "";
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
} catch (OutOfMemoryError e) {
|
} catch (OutOfMemoryError e) {
|
||||||
System.gc();
|
System.gc();
|
||||||
mTitleText = "";
|
mTitleText = "";
|
||||||
mBodyText = new ArrayList<>();
|
mBodyText = "";
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
@ -186,11 +186,7 @@ public class ReadingActivity extends AppCompatActivity {
|
|||||||
if (mTitleText.isEmpty() || mBodyText.isEmpty()) {
|
if (mTitleText.isEmpty() || mBodyText.isEmpty()) {
|
||||||
setText(getString(R.string.untitled), getString(R.string.loading_failed));
|
setText(getString(R.string.untitled), getString(R.string.loading_failed));
|
||||||
} else {
|
} else {
|
||||||
StringBuilder builder = new StringBuilder();
|
setText(mTitleText, mBodyText);
|
||||||
for (String text : mBodyText) {
|
|
||||||
builder.append(text).append("\n\n");
|
|
||||||
}
|
|
||||||
setText(mTitleText, builder.toString());
|
|
||||||
}
|
}
|
||||||
super.onPostExecute(result);
|
super.onPostExecute(result);
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -22,19 +22,16 @@ import java.io.FileWriter;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
import java.net.Proxy;
|
import java.net.Proxy;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
import java.util.zip.Inflater;
|
import java.util.zip.Inflater;
|
||||||
import java.util.zip.InflaterInputStream;
|
import java.util.zip.InflaterInputStream;
|
||||||
|
|
||||||
import acr.browser.lightning.constant.Constants;
|
|
||||||
import android.util.Log;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class to fetch articles. This class is thread safe.
|
* Class to fetch articles. This class is thread safe.
|
||||||
*
|
*
|
||||||
@ -64,7 +61,7 @@ public class HtmlFetcher {
|
|||||||
else
|
else
|
||||||
existing.add(domainStr);
|
existing.add(domainStr);
|
||||||
|
|
||||||
String html = new HtmlFetcher().fetchAsString(url, 20000);
|
String html = new HtmlFetcher().fetchAsString(url, 2000);
|
||||||
String outFile = domainStr + counterStr + ".html";
|
String outFile = domainStr + counterStr + ".html";
|
||||||
BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
|
BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
|
||||||
writer.write(html);
|
writer.write(html);
|
||||||
@ -73,8 +70,8 @@ public class HtmlFetcher {
|
|||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String referrer = "https://github.com/karussell/snacktory";
|
private String referrer = "http://jetsli.de/crawler";
|
||||||
private String userAgent = "Mozilla/5.0 (compatible; Snacktory; +" + referrer + ')';
|
private String userAgent = "Mozilla/5.0 (compatible; Jetslide; +" + referrer + ')';
|
||||||
private String cacheControl = "max-age=0";
|
private String cacheControl = "max-age=0";
|
||||||
private String language = "en-us";
|
private String language = "en-us";
|
||||||
private String accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
|
private String accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
|
||||||
@ -83,7 +80,7 @@ public class HtmlFetcher {
|
|||||||
private final AtomicInteger cacheCounter = new AtomicInteger(0);
|
private final AtomicInteger cacheCounter = new AtomicInteger(0);
|
||||||
private int maxTextLength = -1;
|
private int maxTextLength = -1;
|
||||||
private ArticleTextExtractor extractor = new ArticleTextExtractor();
|
private ArticleTextExtractor extractor = new ArticleTextExtractor();
|
||||||
private final Set<String> furtherResolveNecessary = new LinkedHashSet<String>() {
|
private Set<String> furtherResolveNecessary = new LinkedHashSet<String>() {
|
||||||
{
|
{
|
||||||
add("bit.ly");
|
add("bit.ly");
|
||||||
add("cli.gs");
|
add("cli.gs");
|
||||||
@ -202,6 +199,12 @@ public class HtmlFetcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
|
public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
|
||||||
|
return fetchAndExtract(url, timeout, resolve, 0, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// main workhorse to call externally
|
||||||
|
public JResult fetchAndExtract(String url, int timeout, boolean resolve,
|
||||||
|
int maxContentSize, boolean forceReload) throws Exception {
|
||||||
String originalUrl = url;
|
String originalUrl = url;
|
||||||
url = SHelper.removeHashbang(url);
|
url = SHelper.removeHashbang(url);
|
||||||
String gUrl = SHelper.getUrlFromUglyGoogleRedirect(url);
|
String gUrl = SHelper.getUrlFromUglyGoogleRedirect(url);
|
||||||
@ -219,9 +222,8 @@ public class HtmlFetcher {
|
|||||||
if (res != null)
|
if (res != null)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
String resUrl = getResolvedUrl(url, timeout);
|
String resUrl = getResolvedUrl(url, timeout, 0);
|
||||||
if (resUrl.isEmpty()) {
|
if (resUrl.isEmpty()) {
|
||||||
Log.d(Constants.TAG, "resolved url is empty. Url is: " + url);
|
|
||||||
|
|
||||||
JResult result = new JResult();
|
JResult result = new JResult();
|
||||||
if (cache != null)
|
if (cache != null)
|
||||||
@ -229,10 +231,9 @@ public class HtmlFetcher {
|
|||||||
return result.setUrl(url);
|
return result.setUrl(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if resolved url is longer then use it!
|
// if resolved url is different then use it!
|
||||||
if (resUrl.trim().length() > url.length()) {
|
if (!resUrl.equals(url)) {
|
||||||
// this is necessary e.g. for some homebaken url resolvers which
|
// this is necessary e.g. for some homebaken url resolvers which return
|
||||||
// return
|
|
||||||
// the resolved url relative to url!
|
// the resolved url relative to url!
|
||||||
url = SHelper.useDomainOfFirstArg4Second(url, resUrl);
|
url = SHelper.useDomainOfFirstArg4Second(url, resUrl);
|
||||||
}
|
}
|
||||||
@ -244,20 +245,18 @@ public class HtmlFetcher {
|
|||||||
return res;
|
return res;
|
||||||
|
|
||||||
JResult result = new JResult();
|
JResult result = new JResult();
|
||||||
// or should we use? <link rel="canonical"
|
// or should we use? <link rel="canonical" href="http://www.N24.de/news/newsitem_6797232.html"/>
|
||||||
// href="http://www.N24.de/news/newsitem_6797232.html"/>
|
|
||||||
result.setUrl(url);
|
result.setUrl(url);
|
||||||
result.setOriginalUrl(originalUrl);
|
result.setOriginalUrl(originalUrl);
|
||||||
result.setDate(SHelper.estimateDate(url));
|
|
||||||
|
|
||||||
// Immediately put the url into the cache as extracting content takes
|
// Immediately put the url into the cache as extracting content takes time.
|
||||||
// time.
|
|
||||||
if (cache != null) {
|
if (cache != null) {
|
||||||
cache.put(originalUrl, result);
|
cache.put(originalUrl, result);
|
||||||
cache.put(url, result);
|
cache.put(url, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
String lowerUrl = url.toLowerCase(Locale.getDefault());
|
// extract content to the extent appropriate for content type
|
||||||
|
String lowerUrl = url.toLowerCase();
|
||||||
if (SHelper.isDoc(lowerUrl) || SHelper.isApp(lowerUrl) || SHelper.isPackage(lowerUrl)) {
|
if (SHelper.isDoc(lowerUrl) || SHelper.isApp(lowerUrl) || SHelper.isPackage(lowerUrl)) {
|
||||||
// skip
|
// skip
|
||||||
} else if (SHelper.isVideo(lowerUrl) || SHelper.isAudio(lowerUrl)) {
|
} else if (SHelper.isVideo(lowerUrl) || SHelper.isAudio(lowerUrl)) {
|
||||||
@ -265,16 +264,30 @@ public class HtmlFetcher {
|
|||||||
} else if (SHelper.isImage(lowerUrl)) {
|
} else if (SHelper.isImage(lowerUrl)) {
|
||||||
result.setImageUrl(url);
|
result.setImageUrl(url);
|
||||||
} else {
|
} else {
|
||||||
extractor.extractContent(result, fetchAsString(url, timeout));
|
try {
|
||||||
|
String urlToDownload = url;
|
||||||
|
if (forceReload) {
|
||||||
|
urlToDownload = getURLtoBreakCache(url);
|
||||||
|
}
|
||||||
|
extractor.extractContent(result, fetchAsString(urlToDownload, timeout), maxContentSize);
|
||||||
|
} catch (IOException io) {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
if (result.getFaviconUrl().isEmpty())
|
if (result.getFaviconUrl().isEmpty())
|
||||||
result.setFaviconUrl(SHelper.getDefaultFavicon(url));
|
result.setFaviconUrl(SHelper.getDefaultFavicon(url));
|
||||||
|
|
||||||
// some links are relative to root and do not include the domain of
|
// some links are relative to root and do not include the domain of the url :(
|
||||||
// the url :(
|
if (!result.getFaviconUrl().isEmpty())
|
||||||
result.setFaviconUrl(fixUrl(url, result.getFaviconUrl()));
|
result.setFaviconUrl(fixUrl(url, result.getFaviconUrl()));
|
||||||
result.setImageUrl(fixUrl(url, result.getImageUrl()));
|
|
||||||
result.setVideoUrl(fixUrl(url, result.getVideoUrl()));
|
if (!result.getImageUrl().isEmpty())
|
||||||
result.setRssUrl(fixUrl(url, result.getRssUrl()));
|
result.setImageUrl(fixUrl(url, result.getImageUrl()));
|
||||||
|
|
||||||
|
if (!result.getVideoUrl().isEmpty())
|
||||||
|
result.setVideoUrl(fixUrl(url, result.getVideoUrl()));
|
||||||
|
|
||||||
|
if (!result.getRssUrl().isEmpty())
|
||||||
|
result.setRssUrl(fixUrl(url, result.getRssUrl()));
|
||||||
}
|
}
|
||||||
result.setText(lessText(result.getText()));
|
result.setText(lessText(result.getText()));
|
||||||
synchronized (result) {
|
synchronized (result) {
|
||||||
@ -283,6 +296,20 @@ public class HtmlFetcher {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ugly hack to break free from any cached versions, a few URLs required this.
|
||||||
|
public static String getURLtoBreakCache(String url) {
|
||||||
|
try {
|
||||||
|
URL aURL = new URL(url);
|
||||||
|
if (aURL.getQuery() != null && aURL.getQuery().isEmpty()) {
|
||||||
|
return url + "?1";
|
||||||
|
} else {
|
||||||
|
return url + "&1";
|
||||||
|
}
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String lessText(String text) {
|
public String lessText(String text) {
|
||||||
if (text == null)
|
if (text == null)
|
||||||
return "";
|
return "";
|
||||||
@ -297,13 +324,14 @@ public class HtmlFetcher {
|
|||||||
return SHelper.useDomainOfFirstArg4Second(url, urlOrPath);
|
return SHelper.useDomainOfFirstArg4Second(url, urlOrPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String fetchAsString(String urlAsString, int timeout) throws
|
public String fetchAsString(String urlAsString, int timeout)
|
||||||
IOException {
|
throws MalformedURLException, IOException {
|
||||||
return fetchAsString(urlAsString, timeout, true);
|
return fetchAsString(urlAsString, timeout, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// main routine to get raw webpage content
|
||||||
public String fetchAsString(String urlAsString, int timeout, boolean includeSomeGooseOptions)
|
public String fetchAsString(String urlAsString, int timeout, boolean includeSomeGooseOptions)
|
||||||
throws IOException {
|
throws MalformedURLException, IOException {
|
||||||
HttpURLConnection hConn = createUrlConnection(urlAsString, timeout, includeSomeGooseOptions);
|
HttpURLConnection hConn = createUrlConnection(urlAsString, timeout, includeSomeGooseOptions);
|
||||||
hConn.setInstanceFollowRedirects(true);
|
hConn.setInstanceFollowRedirects(true);
|
||||||
String encoding = hConn.getContentEncoding();
|
String encoding = hConn.getContentEncoding();
|
||||||
@ -317,27 +345,23 @@ public class HtmlFetcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String enc = Converter.extractEncoding(hConn.getContentType());
|
String enc = Converter.extractEncoding(hConn.getContentType());
|
||||||
String res = createConverter(urlAsString).streamToString(is, enc);
|
return createConverter(urlAsString).streamToString(is, enc);
|
||||||
Log.d(Constants.TAG, res.length() + " FetchAsString:" + urlAsString);
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Converter createConverter(String url) {
|
public static Converter createConverter(String url) {
|
||||||
return new Converter(url);
|
return new Converter(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* On some devices we have to hack:
|
* On some devices we have to hack:
|
||||||
* http://developers.sun.com/mobility/reference
|
* http://developers.sun.com/mobility/reference/techart/design_guidelines/http_redirection.html
|
||||||
* /techart/design_guidelines/http_redirection.html
|
|
||||||
*
|
*
|
||||||
* @param timeout
|
* @param timeout Sets a specified timeout value, in milliseconds
|
||||||
* Sets a specified timeout value, in milliseconds
|
|
||||||
* @return the resolved url if any. Or null if it couldn't resolve the url
|
* @return the resolved url if any. Or null if it couldn't resolve the url
|
||||||
* (within the specified time) or the same url if response code is
|
* (within the specified time) or the same url if response code is OK
|
||||||
* OK
|
|
||||||
*/
|
*/
|
||||||
public String getResolvedUrl(String urlAsString, int timeout) {
|
public String getResolvedUrl(String urlAsString, int timeout,
|
||||||
|
int num_redirects) {
|
||||||
String newUrl = null;
|
String newUrl = null;
|
||||||
int responseCode = -1;
|
int responseCode = -1;
|
||||||
try {
|
try {
|
||||||
@ -354,28 +378,32 @@ public class HtmlFetcher {
|
|||||||
return urlAsString;
|
return urlAsString;
|
||||||
|
|
||||||
newUrl = hConn.getHeaderField("Location");
|
newUrl = hConn.getHeaderField("Location");
|
||||||
if (responseCode / 100 == 3 && newUrl != null) {
|
// Note that the max recursion level is 5.
|
||||||
|
if (responseCode / 100 == 3 && newUrl != null && num_redirects < 5) {
|
||||||
newUrl = newUrl.replaceAll(" ", "+");
|
newUrl = newUrl.replaceAll(" ", "+");
|
||||||
// some services use (none-standard) utf8 in their location
|
// some services use (none-standard) utf8 in their location header
|
||||||
// header
|
|
||||||
if (urlAsString.startsWith("http://bit.ly")
|
if (urlAsString.startsWith("http://bit.ly")
|
||||||
|| urlAsString.startsWith("http://is.gd"))
|
|| urlAsString.startsWith("http://is.gd"))
|
||||||
newUrl = encodeUriFromHeader(newUrl);
|
newUrl = encodeUriFromHeader(newUrl);
|
||||||
|
|
||||||
// fix problems if shortened twice. as it is often the case
|
// AP: This code is not longer need, instead we always follow
|
||||||
// after twitters' t.co bullshit
|
// multiple redirects.
|
||||||
if (furtherResolveNecessary.contains(SHelper.extractDomain(newUrl, true)))
|
//
|
||||||
newUrl = getResolvedUrl(newUrl, timeout);
|
// fix problems if shortened twice. as it is often the case after twitters' t.co bullshit
|
||||||
|
//if (furtherResolveNecessary.contains(SHelper.extractDomain(newUrl, true)))
|
||||||
|
// newUrl = getResolvedUrl(newUrl, timeout);
|
||||||
|
|
||||||
|
// Add support for URLs with multiple levels of redirection,
|
||||||
|
// call getResolvedUrl until there is no more redirects or a
|
||||||
|
// max number of redirects is reached.
|
||||||
|
newUrl = SHelper.useDomainOfFirstArg4Second(urlAsString, newUrl);
|
||||||
|
newUrl = getResolvedUrl(newUrl, timeout, num_redirects + 1);
|
||||||
return newUrl;
|
return newUrl;
|
||||||
} else
|
} else
|
||||||
return urlAsString;
|
return urlAsString;
|
||||||
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
Log.e(Constants.TAG, "getResolvedUrl:" + urlAsString + " Error:" + ex.getMessage());
|
|
||||||
return "";
|
return "";
|
||||||
} finally {
|
|
||||||
Log.e(Constants.TAG, responseCode + " url:" + urlAsString + " resolved:" + newUrl);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -400,9 +428,9 @@ public class HtmlFetcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected HttpURLConnection createUrlConnection(String urlAsStr, int timeout,
|
protected HttpURLConnection createUrlConnection(String urlAsStr, int timeout,
|
||||||
boolean includeSomeGooseOptions) throws IOException {
|
boolean includeSomeGooseOptions) throws MalformedURLException, IOException {
|
||||||
URL url = new URL(urlAsStr);
|
URL url = new URL(urlAsStr);
|
||||||
// using proxy may increase latency
|
//using proxy may increase latency
|
||||||
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
|
HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
|
||||||
hConn.setRequestProperty("User-Agent", userAgent);
|
hConn.setRequestProperty("User-Agent", userAgent);
|
||||||
hConn.setRequestProperty("Accept", accept);
|
hConn.setRequestProperty("Accept", accept);
|
||||||
@ -415,8 +443,7 @@ public class HtmlFetcher {
|
|||||||
hConn.setRequestProperty("Cache-Control", cacheControl);
|
hConn.setRequestProperty("Cache-Control", cacheControl);
|
||||||
}
|
}
|
||||||
|
|
||||||
// suggest respond to be gzipped or deflated (which is just another
|
// suggest respond to be gzipped or deflated (which is just another compression)
|
||||||
// compression)
|
|
||||||
// http://stackoverflow.com/q/3932117
|
// http://stackoverflow.com/q/3932117
|
||||||
hConn.setRequestProperty("Accept-Encoding", "gzip, deflate");
|
hConn.setRequestProperty("Accept-Encoding", "gzip, deflate");
|
||||||
hConn.setConnectTimeout(timeout);
|
hConn.setConnectTimeout(timeout);
|
||||||
@ -424,14 +451,12 @@ public class HtmlFetcher {
|
|||||||
return hConn;
|
return hConn;
|
||||||
}
|
}
|
||||||
|
|
||||||
private JResult getFromCache(String url, String originalUrl) throws Exception {
|
private JResult getFromCache(String url, String originalUrl) {
|
||||||
if (cache != null) {
|
if (cache != null) {
|
||||||
JResult res = cache.get(url);
|
JResult res = cache.get(url);
|
||||||
if (res != null) {
|
if (res != null) {
|
||||||
// e.g. the cache returned a shortened url as original url now
|
// e.g. the cache returned a shortened url as original url now we want to store the
|
||||||
// we want to store the
|
// current original url! Also it can be that the cache response to url but the JResult
|
||||||
// current original url! Also it can be that the cache response
|
|
||||||
// to url but the JResult
|
|
||||||
// does not contain it so overwrite it:
|
// does not contain it so overwrite it:
|
||||||
res.setUrl(url);
|
res.setUrl(url);
|
||||||
res.setOriginalUrl(originalUrl);
|
res.setOriginalUrl(originalUrl);
|
||||||
|
@ -16,10 +16,14 @@
|
|||||||
package acr.browser.lightning.reading;
|
package acr.browser.lightning.reading;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parsed result from web page containing important title, text and image.
|
* Parsed result from web page containing important title, text and image.
|
||||||
@ -38,10 +42,15 @@ public class JResult implements Serializable {
|
|||||||
private String text;
|
private String text;
|
||||||
private String faviconUrl;
|
private String faviconUrl;
|
||||||
private String description;
|
private String description;
|
||||||
private String dateString;
|
private String authorName;
|
||||||
private List<String> textList;
|
private String authorDescription;
|
||||||
|
private Date date;
|
||||||
private Collection<String> keywords;
|
private Collection<String> keywords;
|
||||||
private List<ImageResult> images = null;
|
private List<ImageResult> images = null;
|
||||||
|
private List<Map<String, String>> links = new ArrayList<>();
|
||||||
|
private String type;
|
||||||
|
private String sitename;
|
||||||
|
private String language;
|
||||||
|
|
||||||
public JResult() {
|
public JResult() {
|
||||||
}
|
}
|
||||||
@ -108,6 +117,28 @@ public class JResult implements Serializable {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getAuthorName() {
|
||||||
|
if (authorName == null)
|
||||||
|
return "";
|
||||||
|
return authorName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public JResult setAuthorName(String authorName) {
|
||||||
|
this.authorName = authorName;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAuthorDescription() {
|
||||||
|
if (authorDescription == null)
|
||||||
|
return "";
|
||||||
|
return authorDescription;
|
||||||
|
}
|
||||||
|
|
||||||
|
public JResult setAuthorDescription(String authorDescription) {
|
||||||
|
this.authorDescription = authorDescription;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public String getImageUrl() {
|
public String getImageUrl() {
|
||||||
if (imageUrl == null)
|
if (imageUrl == null)
|
||||||
return "";
|
return "";
|
||||||
@ -131,17 +162,6 @@ public class JResult implements Serializable {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getTextList() {
|
|
||||||
if (this.textList == null)
|
|
||||||
return new ArrayList<>();
|
|
||||||
return this.textList;
|
|
||||||
}
|
|
||||||
|
|
||||||
public JResult setTextList(List<String> textList) {
|
|
||||||
this.textList = textList;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
public String getTitle() {
|
||||||
if (title == null)
|
if (title == null)
|
||||||
return "";
|
return "";
|
||||||
@ -164,8 +184,8 @@ public class JResult implements Serializable {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public JResult setDate(String date) {
|
public JResult setDate(Date date) {
|
||||||
this.dateString = date;
|
this.date = date;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,8 +200,8 @@ public class JResult implements Serializable {
|
|||||||
/**
|
/**
|
||||||
* @return get date from url or guessed from text
|
* @return get date from url or guessed from text
|
||||||
*/
|
*/
|
||||||
public String getDate() {
|
public Date getDate() {
|
||||||
return dateString;
|
return date;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -209,6 +229,44 @@ public class JResult implements Serializable {
|
|||||||
this.images = images;
|
this.images = images;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addLink(String url, String text, Integer pos) {
|
||||||
|
Map link = new HashMap();
|
||||||
|
link.put("url", url);
|
||||||
|
link.put("text", text);
|
||||||
|
link.put("offset", String.valueOf(pos));
|
||||||
|
links.add(link);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Map<String, String>> getLinks() {
|
||||||
|
if (links == null)
|
||||||
|
return Collections.emptyList();
|
||||||
|
return links;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSitename() {
|
||||||
|
return sitename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSitename(String sitename) {
|
||||||
|
this.sitename = sitename;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "title:" + getTitle() + " imageUrl:" + getImageUrl() + " text:" + text;
|
return "title:" + getTitle() + " imageUrl:" + getImageUrl() + " text:" + text;
|
||||||
|
@ -4,40 +4,46 @@ import org.jsoup.Jsoup;
|
|||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.jsoup.nodes.Node;
|
import org.jsoup.nodes.Node;
|
||||||
import org.jsoup.nodes.TextNode;
|
import org.jsoup.nodes.TextNode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author goose | jim
|
* @author goose | jim
|
||||||
* @author karussell
|
* @author karussell
|
||||||
*
|
* <p/>
|
||||||
* this class will be responsible for taking our top node and stripping
|
* this class will be responsible for taking our top node and stripping out junk
|
||||||
* out junk we don't want and getting it ready for how we want it
|
* we don't want and getting it ready for how we want it presented to the user
|
||||||
* presented to the user
|
|
||||||
*/
|
*/
|
||||||
public class OutputFormatter {
|
public class OutputFormatter {
|
||||||
|
|
||||||
public static final int MIN_PARAGRAPH_TEXT = 50;
|
private static final int MIN_FIRST_PARAGRAPH_TEXT = 50; // Min size of first paragraph
|
||||||
|
private static final int MIN_PARAGRAPH_TEXT = 30; // Min size of any other paragraphs
|
||||||
private static final List<String> NODES_TO_REPLACE = Arrays.asList("strong", "b", "i");
|
private static final List<String> NODES_TO_REPLACE = Arrays.asList("strong", "b", "i");
|
||||||
private Pattern unlikelyPattern = Pattern.compile("display\\:none|visibility\\:hidden");
|
private Pattern unlikelyPattern = Pattern.compile("display\\:none|visibility\\:hidden");
|
||||||
protected final int minParagraphText;
|
private final int minFirstParagraphText;
|
||||||
protected final List<String> nodesToReplace;
|
private final int minParagraphText;
|
||||||
protected String nodesToKeepCssSelector = "p";
|
private final List<String> nodesToReplace;
|
||||||
|
private String nodesToKeepCssSelector = "p, ol";
|
||||||
|
|
||||||
public OutputFormatter() {
|
public OutputFormatter() {
|
||||||
this(MIN_PARAGRAPH_TEXT, NODES_TO_REPLACE);
|
this(MIN_FIRST_PARAGRAPH_TEXT, MIN_PARAGRAPH_TEXT, NODES_TO_REPLACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OutputFormatter(int minParagraphText) {
|
public OutputFormatter(int minParagraphText) {
|
||||||
this(minParagraphText, NODES_TO_REPLACE);
|
this(minParagraphText, minParagraphText, NODES_TO_REPLACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OutputFormatter(int minParagraphText, List<String> nodesToReplace) {
|
public OutputFormatter(int minFirstParagraphText, int minParagraphText) {
|
||||||
|
this(minFirstParagraphText, minParagraphText, NODES_TO_REPLACE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OutputFormatter(int minFirstParagraphText, int minParagraphText,
|
||||||
|
List<String> nodesToReplace) {
|
||||||
|
this.minFirstParagraphText = minFirstParagraphText;
|
||||||
this.minParagraphText = minParagraphText;
|
this.minParagraphText = minParagraphText;
|
||||||
this.nodesToReplace = nodesToReplace;
|
this.nodesToReplace = nodesToReplace;
|
||||||
}
|
}
|
||||||
@ -53,36 +59,34 @@ public class OutputFormatter {
|
|||||||
* takes an element and turns the P tags into \n\n
|
* takes an element and turns the P tags into \n\n
|
||||||
*/
|
*/
|
||||||
public String getFormattedText(Element topNode) {
|
public String getFormattedText(Element topNode) {
|
||||||
|
setParagraphIndex(topNode, nodesToKeepCssSelector);
|
||||||
removeNodesWithNegativeScores(topNode);
|
removeNodesWithNegativeScores(topNode);
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
append(topNode, sb, nodesToKeepCssSelector);
|
int countOfP = append(topNode, sb, nodesToKeepCssSelector);
|
||||||
String str = SHelper.innerTrim(sb.toString());
|
String str = SHelper.innerTrim(sb.toString());
|
||||||
if (str.length() > 100)
|
|
||||||
|
int topNodeLength = topNode.text().length();
|
||||||
|
if (topNodeLength == 0) {
|
||||||
|
topNodeLength = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
boolean lowTextRatio = ((str.length() / (topNodeLength * 1.0)) < 0.25);
|
||||||
|
if (str.length() > 100 && countOfP > 0 && !lowTextRatio)
|
||||||
return str;
|
return str;
|
||||||
|
|
||||||
// no subelements
|
// no subelements
|
||||||
if (str.isEmpty() || !topNode.text().isEmpty()
|
if (str.isEmpty() || (!topNode.text().isEmpty()
|
||||||
&& str.length() <= topNode.ownText().length())
|
&& str.length() <= topNode.ownText().length())
|
||||||
|
|| countOfP == 0 || lowTextRatio) {
|
||||||
str = topNode.text();
|
str = topNode.text();
|
||||||
|
}
|
||||||
|
|
||||||
// if jsoup failed to parse the whole html now parse this smaller
|
// if jsoup failed to parse the whole html now parse this smaller
|
||||||
// snippet again to avoid html tags disturbing our text:
|
// snippet again to avoid html tags disturbing our text:
|
||||||
return Jsoup.parse(str).text();
|
return Jsoup.parse(str).text();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes an element and returns a list of texts extracted from the P tags
|
|
||||||
*/
|
|
||||||
public List<String> getTextList(Element topNode) {
|
|
||||||
List<String> texts = new ArrayList<>();
|
|
||||||
for (Element element : topNode.select(this.nodesToKeepCssSelector)) {
|
|
||||||
if (element.hasText()) {
|
|
||||||
texts.add(element.text());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return texts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If there are elements inside our top node that have a negative gravity
|
* If there are elements inside our top node that have a negative gravity
|
||||||
* score remove them
|
* score remove them
|
||||||
@ -90,15 +94,20 @@ public class OutputFormatter {
|
|||||||
protected void removeNodesWithNegativeScores(Element topNode) {
|
protected void removeNodesWithNegativeScores(Element topNode) {
|
||||||
Elements gravityItems = topNode.select("*[gravityScore]");
|
Elements gravityItems = topNode.select("*[gravityScore]");
|
||||||
for (Element item : gravityItems) {
|
for (Element item : gravityItems) {
|
||||||
int score = Integer.parseInt(item.attr("gravityScore"));
|
int score = getScore(item);
|
||||||
if (score < 0 || item.text().length() < minParagraphText)
|
int paragraphIndex = getParagraphIndex(item);
|
||||||
|
if (score < 0 || item.text().length() < getMinParagraph(paragraphIndex)) {
|
||||||
item.remove();
|
item.remove();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void append(Element node, StringBuilder sb, String tagName) {
|
protected int append(Element node, StringBuilder sb, String tagName) {
|
||||||
|
int countOfP = 0; // Number of P elements in the article
|
||||||
|
int paragraphWithTextIndex = 0;
|
||||||
// is select more costly then getElementsByTag?
|
// is select more costly then getElementsByTag?
|
||||||
MAIN: for (Element e : node.select(tagName)) {
|
MAIN:
|
||||||
|
for (Element e : node.select(tagName)) {
|
||||||
Element tmpEl = e;
|
Element tmpEl = e;
|
||||||
// check all elements until 'node'
|
// check all elements until 'node'
|
||||||
while (tmpEl != null && !tmpEl.equals(node)) {
|
while (tmpEl != null && !tmpEl.equals(node)) {
|
||||||
@ -108,18 +117,56 @@ public class OutputFormatter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String text = node2Text(e);
|
String text = node2Text(e);
|
||||||
if (text.isEmpty() || text.length() < minParagraphText
|
if (text.isEmpty() || text.length() < getMinParagraph(paragraphWithTextIndex)
|
||||||
|| text.length() > SHelper.countLetters(text) * 2)
|
|| text.length() > SHelper.countLetters(text) * 2) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (e.tagName().equals("p")) {
|
||||||
|
countOfP++;
|
||||||
|
}
|
||||||
|
|
||||||
sb.append(text);
|
sb.append(text);
|
||||||
sb.append("\n\n");
|
sb.append("\n\n");
|
||||||
|
paragraphWithTextIndex += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return countOfP;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void setParagraphIndex(Element node, String tagName) {
|
||||||
|
int paragraphIndex = 0;
|
||||||
|
for (Element e : node.select(tagName)) {
|
||||||
|
e.attr("paragraphIndex", Integer.toString(paragraphIndex++));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int getMinParagraph(int paragraphIndex) {
|
||||||
|
if (paragraphIndex < 1) {
|
||||||
|
return minFirstParagraphText;
|
||||||
|
} else {
|
||||||
|
return minParagraphText;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int getParagraphIndex(Element el) {
|
||||||
|
try {
|
||||||
|
return Integer.parseInt(el.attr("paragraphIndex"));
|
||||||
|
} catch (NumberFormatException ex) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int getScore(Element el) {
|
||||||
|
try {
|
||||||
|
return Integer.parseInt(el.attr("gravityScore"));
|
||||||
|
} catch (Exception ex) {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean unlikely(Node e) {
|
boolean unlikely(Node e) {
|
||||||
if (e.attr("class") != null
|
if (e.attr("class") != null && e.attr("class").toLowerCase().contains("caption"))
|
||||||
&& e.attr("class").toLowerCase(Locale.getDefault()).contains("caption"))
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
String style = e.attr("style");
|
String style = e.attr("style");
|
||||||
@ -127,36 +174,34 @@ public class OutputFormatter {
|
|||||||
return unlikelyPattern.matcher(style).find() || unlikelyPattern.matcher(clazz).find();
|
return unlikelyPattern.matcher(style).find() || unlikelyPattern.matcher(clazz).find();
|
||||||
}
|
}
|
||||||
|
|
||||||
void appendTextSkipHidden(Element e, StringBuilder accum) {
|
void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
|
||||||
for (Node child : e.childNodes()) {
|
for (Node child : e.childNodes()) {
|
||||||
if (unlikely(child))
|
if (unlikely(child)) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
if (child instanceof TextNode) {
|
if (child instanceof TextNode) {
|
||||||
TextNode textNode = (TextNode) child;
|
TextNode textNode = (TextNode) child;
|
||||||
String txt = textNode.text();
|
String txt = textNode.text();
|
||||||
accum.append(txt);
|
accum.append(txt);
|
||||||
} else if (child instanceof Element) {
|
} else if (child instanceof Element) {
|
||||||
Element element = (Element) child;
|
Element element = (Element) child;
|
||||||
if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
|
if (accum.length() > 0 && element.isBlock()
|
||||||
|
&& !lastCharIsWhitespace(accum))
|
||||||
accum.append(' ');
|
accum.append(' ');
|
||||||
else if (element.tagName().equals("br"))
|
else if (element.tagName().equals("br"))
|
||||||
accum.append(' ');
|
accum.append(' ');
|
||||||
appendTextSkipHidden(element, accum);
|
appendTextSkipHidden(element, accum, indent + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean lastCharIsWhitespace(StringBuilder accum) {
|
static boolean lastCharIsWhitespace(StringBuilder accum) {
|
||||||
return (accum.length() != 0) && Character.isWhitespace(accum.charAt(accum.length() - 1));
|
return accum.length() != 0 && Character.isWhitespace(accum.charAt(accum.length() - 1));
|
||||||
}
|
|
||||||
|
|
||||||
protected String node2TextOld(Element el) {
|
|
||||||
return el.text();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String node2Text(Element el) {
|
protected String node2Text(Element el) {
|
||||||
StringBuilder sb = new StringBuilder(200);
|
StringBuilder sb = new StringBuilder(200);
|
||||||
appendTextSkipHidden(el, sb);
|
appendTextSkipHidden(el, sb, 0);
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,17 +15,19 @@
|
|||||||
*/
|
*/
|
||||||
package acr.browser.lightning.reading;
|
package acr.browser.lightning.reading;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.CookieHandler;
|
import java.net.CookieHandler;
|
||||||
import java.net.CookieManager;
|
import java.net.CookieManager;
|
||||||
import java.net.CookiePolicy;
|
import java.net.CookiePolicy;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
import java.net.URLDecoder;
|
import java.net.URLDecoder;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.security.SecureRandom;
|
import java.security.SecureRandom;
|
||||||
import java.security.cert.CertificateException;
|
import java.security.cert.CertificateException;
|
||||||
import java.security.cert.X509Certificate;
|
import java.security.cert.X509Certificate;
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -33,10 +35,8 @@ import javax.net.ssl.KeyManager;
|
|||||||
import javax.net.ssl.SSLContext;
|
import javax.net.ssl.SSLContext;
|
||||||
import javax.net.ssl.TrustManager;
|
import javax.net.ssl.TrustManager;
|
||||||
import javax.net.ssl.X509TrustManager;
|
import javax.net.ssl.X509TrustManager;
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @author Peter Karich
|
* @author Peter Karich
|
||||||
*/
|
*/
|
||||||
public class SHelper {
|
public class SHelper {
|
||||||
@ -127,8 +127,7 @@ public class SHelper {
|
|||||||
return null;
|
return null;
|
||||||
|
|
||||||
// dynamic programming => save already identical length into array
|
// dynamic programming => save already identical length into array
|
||||||
// to understand this algo simply print identical length in every entry
|
// to understand this algo simply print identical length in every entry of the array
|
||||||
// of the array
|
|
||||||
// i+1, j+1 then reuses information from i,j
|
// i+1, j+1 then reuses information from i,j
|
||||||
// java initializes them already with 0
|
// java initializes them already with 0
|
||||||
int[][] num = new int[str1.length()][str2.length()];
|
int[][] num = new int[str1.length()][str2.length()];
|
||||||
@ -152,7 +151,7 @@ public class SHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new int[] { lastSubstrBegin, endIndex };
|
return new int[]{lastSubstrBegin, endIndex};
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getDefaultFavicon(String url) {
|
public static String getDefaultFavicon(String url) {
|
||||||
@ -160,35 +159,19 @@ public class SHelper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param urlForDomain
|
* @param urlForDomain extract the domain from this url
|
||||||
* extract the domain from this url
|
* @param path this url does not have a domain
|
||||||
* @param path
|
* @return
|
||||||
* this url does not have a domain
|
|
||||||
* @return returns the domain
|
|
||||||
*/
|
*/
|
||||||
public static String useDomainOfFirstArg4Second(String urlForDomain, String path) {
|
public static String useDomainOfFirstArg4Second(String urlForDomain, String path) {
|
||||||
if (path.startsWith("http"))
|
try {
|
||||||
|
// See: http://stackoverflow.com/questions/1389184/building-an-absolute-url-from-a-relative-url-in-java
|
||||||
|
URL baseUrl = new URL(urlForDomain);
|
||||||
|
URL relativeurl = new URL(baseUrl, path);
|
||||||
|
return relativeurl.toString();
|
||||||
|
} catch (MalformedURLException ex) {
|
||||||
return path;
|
return path;
|
||||||
|
|
||||||
if ("favicon.ico".equals(path))
|
|
||||||
path = "/favicon.ico";
|
|
||||||
|
|
||||||
if (path.startsWith("//")) {
|
|
||||||
// wikipedia special case, see tests
|
|
||||||
if (urlForDomain.startsWith("https:"))
|
|
||||||
return "https:" + path;
|
|
||||||
|
|
||||||
return "http:" + path;
|
|
||||||
} else if (path.startsWith("/"))
|
|
||||||
return "http://" + extractHost(urlForDomain) + path;
|
|
||||||
else if (path.startsWith("../")) {
|
|
||||||
int slashIndex = urlForDomain.lastIndexOf("/");
|
|
||||||
if (slashIndex > 0 && slashIndex + 1 < urlForDomain.length())
|
|
||||||
urlForDomain = urlForDomain.substring(0, slashIndex + 1);
|
|
||||||
|
|
||||||
return urlForDomain + path;
|
|
||||||
}
|
}
|
||||||
return path;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String extractHost(String url) {
|
public static String extractHost(String url) {
|
||||||
@ -224,14 +207,12 @@ public class SHelper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isVideo(String url) {
|
public static boolean isVideo(String url) {
|
||||||
return url.endsWith(".mpeg") || url.endsWith(".mpg") || url.endsWith(".avi")
|
return url.endsWith(".mpeg") || url.endsWith(".mpg") || url.endsWith(".avi") || url.endsWith(".mov")
|
||||||
|| url.endsWith(".mov") || url.endsWith(".mpg4") || url.endsWith(".mp4")
|
|| url.endsWith(".mpg4") || url.endsWith(".mp4") || url.endsWith(".flv") || url.endsWith(".wmv");
|
||||||
|| url.endsWith(".flv") || url.endsWith(".wmv");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isAudio(String url) {
|
public static boolean isAudio(String url) {
|
||||||
return url.endsWith(".mp3") || url.endsWith(".ogg") || url.endsWith(".m3u")
|
return url.endsWith(".mp3") || url.endsWith(".ogg") || url.endsWith(".m3u") || url.endsWith(".wav");
|
||||||
|| url.endsWith(".wav");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isDoc(String url) {
|
public static boolean isDoc(String url) {
|
||||||
@ -241,23 +222,20 @@ public class SHelper {
|
|||||||
|
|
||||||
public static boolean isPackage(String url) {
|
public static boolean isPackage(String url) {
|
||||||
return url.endsWith(".gz") || url.endsWith(".tgz") || url.endsWith(".zip")
|
return url.endsWith(".gz") || url.endsWith(".tgz") || url.endsWith(".zip")
|
||||||
|| url.endsWith(".rar") || url.endsWith(".deb") || url.endsWith(".rpm")
|
|| url.endsWith(".rar") || url.endsWith(".deb") || url.endsWith(".rpm") || url.endsWith(".7z");
|
||||||
|| url.endsWith(".7z");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isApp(String url) {
|
public static boolean isApp(String url) {
|
||||||
return url.endsWith(".exe") || url.endsWith(".bin") || url.endsWith(".bat")
|
return url.endsWith(".exe") || url.endsWith(".bin") || url.endsWith(".bat") || url.endsWith(".dmg");
|
||||||
|| url.endsWith(".dmg");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isImage(String url) {
|
public static boolean isImage(String url) {
|
||||||
return url.endsWith(".png") || url.endsWith(".jpeg") || url.endsWith(".gif")
|
return url.endsWith(".png") || url.endsWith(".jpeg") || url.endsWith(".gif")
|
||||||
|| url.endsWith(".jpg") || url.endsWith(".bmp") || url.endsWith(".ico")
|
|| url.endsWith(".jpg") || url.endsWith(".bmp") || url.endsWith(".ico") || url.endsWith(".eps");
|
||||||
|| url.endsWith(".eps");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* http://blogs.sun.com/CoreJavaTechTips/entry/cookie_handling_in_java_se
|
* @see "http://blogs.sun.com/CoreJavaTechTips/entry/cookie_handling_in_java_se"
|
||||||
*/
|
*/
|
||||||
public static void enableCookieMgmt() {
|
public static void enableCookieMgmt() {
|
||||||
CookieManager manager = new CookieManager();
|
CookieManager manager = new CookieManager();
|
||||||
@ -266,7 +244,7 @@ public class SHelper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* http://stackoverflow.com/questions/2529682/setting-user-agent-of-a-java-urlconnection
|
* @see "http://stackoverflow.com/questions/2529682/setting-user-agent-of-a-java-urlconnection"
|
||||||
*/
|
*/
|
||||||
public static void enableUserAgentOverwrite() {
|
public static void enableUserAgentOverwrite() {
|
||||||
System.setProperty("http.agent", "");
|
System.setProperty("http.agent", "");
|
||||||
@ -377,8 +355,8 @@ public class SHelper {
|
|||||||
} else if (counter == monthCounter + 1) {
|
} else if (counter == monthCounter + 1) {
|
||||||
try {
|
try {
|
||||||
day = Integer.parseInt(str);
|
day = Integer.parseInt(str);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ignored) {
|
||||||
ex.printStackTrace();
|
// ignored
|
||||||
}
|
}
|
||||||
if (day < 1 || day > 31) {
|
if (day < 1 || day > 31) {
|
||||||
day = -1;
|
day = -1;
|
||||||
@ -425,21 +403,11 @@ public class SHelper {
|
|||||||
return dateStr + "/01/01";
|
return dateStr + "/01/01";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// with the help of http://stackoverflow.com/questions/1828775/httpclient-and-ssl
|
||||||
* keep in mind: simpleDateFormatter is not thread safe! call completeDate
|
|
||||||
* before applying this formatter.
|
|
||||||
*/
|
|
||||||
public static SimpleDateFormat createDateFormatter() {
|
|
||||||
return new SimpleDateFormat("yyyy/MM/dd", Locale.getDefault());
|
|
||||||
}
|
|
||||||
|
|
||||||
// with the help of
|
|
||||||
// http://stackoverflow.com/questions/1828775/httpclient-and-ssl
|
|
||||||
public static void enableAnySSL() {
|
public static void enableAnySSL() {
|
||||||
try {
|
try {
|
||||||
SSLContext ctx = SSLContext.getInstance("TLS");
|
SSLContext ctx = SSLContext.getInstance("TLS");
|
||||||
ctx.init(new KeyManager[0], new TrustManager[] { new DefaultTrustManager() },
|
ctx.init(new KeyManager[0], new TrustManager[]{new DefaultTrustManager()}, new SecureRandom());
|
||||||
new SecureRandom());
|
|
||||||
SSLContext.setDefault(ctx);
|
SSLContext.setDefault(ctx);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
ex.printStackTrace();
|
ex.printStackTrace();
|
||||||
@ -449,13 +417,11 @@ public class SHelper {
|
|||||||
private static class DefaultTrustManager implements X509TrustManager {
|
private static class DefaultTrustManager implements X509TrustManager {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkClientTrusted(X509Certificate[] arg0, String arg1)
|
public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
|
||||||
throws CertificateException {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkServerTrusted(X509Certificate[] arg0, String arg1)
|
public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
|
||||||
throws CertificateException {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Loading…
Reference in New Issue
Block a user