/* * Copyright 2011 Peter Karich * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package acr.browser.lightning.reading; import java.io.UnsupportedEncodingException; import java.net.CookieHandler; import java.net.CookieManager; import java.net.CookiePolicy; import java.net.URLDecoder; import java.net.URLEncoder; import java.security.SecureRandom; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.text.SimpleDateFormat; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.net.ssl.KeyManager; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.jsoup.nodes.Element; /** * * @author Peter Karich */ public class SHelper { public static final String UTF8 = "UTF-8"; private static final Pattern SPACE = Pattern.compile(" "); public static String replaceSpaces(String url) { if (!url.isEmpty()) { url = url.trim(); if (url.contains(" ")) { Matcher spaces = SPACE.matcher(url); url = spaces.replaceAll("%20"); } } return url; } public static int count(String str, String substring) { int c = 0; int index1 = str.indexOf(substring); if (index1 >= 0) { c++; c += count(str.substring(index1 + substring.length()), substring); } return c; } /** * remove more than two spaces or newlines */ public static String innerTrim(String str) { if (str.isEmpty()) return ""; StringBuilder sb = new StringBuilder(); boolean previousSpace = false; for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); if (c == ' ' || (int) c == 9 || c == '\n') { previousSpace = true; continue; } if (previousSpace) sb.append(' '); previousSpace = false; sb.append(c); } return sb.toString().trim(); } /** * Starts reading the encoding from the first valid character until an * invalid encoding character occurs. */ public static String encodingCleanup(String str) { StringBuilder sb = new StringBuilder(); boolean startedWithCorrectString = false; for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); if (Character.isDigit(c) || Character.isLetter(c) || c == '-' || c == '_') { startedWithCorrectString = true; sb.append(c); continue; } if (startedWithCorrectString) break; } return sb.toString().trim(); } /** * @return the longest substring as str1.substring(result[0], result[1]); */ public static String getLongestSubstring(String str1, String str2) { int res[] = longestSubstring(str1, str2); if (res == null || res[0] >= res[1]) return ""; return str1.substring(res[0], res[1]); } public static int[] longestSubstring(String str1, String str2) { if (str1 == null || str1.isEmpty() || str2 == null || str2.isEmpty()) return null; // dynamic programming => save already identical length into array // to understand this algo simply print identical length in every entry // of the array // i+1, j+1 then reuses information from i,j // java initializes them already with 0 int[][] num = new int[str1.length()][str2.length()]; int maxlen = 0; int lastSubstrBegin = 0; int endIndex = 0; for (int i = 0; i < str1.length(); i++) { for (int j = 0; j < str2.length(); j++) { if (str1.charAt(i) == str2.charAt(j)) { if ((i == 0) || (j == 0)) num[i][j] = 1; else num[i][j] = 1 + num[i - 1][j - 1]; if (num[i][j] > maxlen) { maxlen = num[i][j]; // generate substring from str1 => i lastSubstrBegin = i - num[i][j] + 1; endIndex = i + 1; } } } } return new int[] { lastSubstrBegin, endIndex }; } public static String getDefaultFavicon(String url) { return useDomainOfFirstArg4Second(url, "/favicon.ico"); } /** * @param urlForDomain * extract the domain from this url * @param path * this url does not have a domain * @return */ public static String useDomainOfFirstArg4Second(String urlForDomain, String path) { if (path.startsWith("http")) return path; if ("favicon.ico".equals(path)) path = "/favicon.ico"; if (path.startsWith("//")) { // wikipedia special case, see tests if (urlForDomain.startsWith("https:")) return "https:" + path; return "http:" + path; } else if (path.startsWith("/")) return "http://" + extractHost(urlForDomain) + path; else if (path.startsWith("../")) { int slashIndex = urlForDomain.lastIndexOf("/"); if (slashIndex > 0 && slashIndex + 1 < urlForDomain.length()) urlForDomain = urlForDomain.substring(0, slashIndex + 1); return urlForDomain + path; } return path; } public static String extractHost(String url) { return extractDomain(url, false); } public static String extractDomain(String url, boolean aggressive) { if (url.startsWith("http://")) url = url.substring("http://".length()); else if (url.startsWith("https://")) url = url.substring("https://".length()); if (aggressive) { if (url.startsWith("www.")) url = url.substring("www.".length()); // strip mobile from start if (url.startsWith("m.")) url = url.substring("m.".length()); } int slashIndex = url.indexOf("/"); if (slashIndex > 0) url = url.substring(0, slashIndex); return url; } public static boolean isVideoLink(String url) { url = extractDomain(url, true); return url.startsWith("youtube.com") || url.startsWith("video.yahoo.com") || url.startsWith("vimeo.com") || url.startsWith("blip.tv"); } public static boolean isVideo(String url) { return url.endsWith(".mpeg") || url.endsWith(".mpg") || url.endsWith(".avi") || url.endsWith(".mov") || url.endsWith(".mpg4") || url.endsWith(".mp4") || url.endsWith(".flv") || url.endsWith(".wmv"); } public static boolean isAudio(String url) { return url.endsWith(".mp3") || url.endsWith(".ogg") || url.endsWith(".m3u") || url.endsWith(".wav"); } public static boolean isDoc(String url) { return url.endsWith(".pdf") || url.endsWith(".ppt") || url.endsWith(".doc") || url.endsWith(".swf") || url.endsWith(".rtf") || url.endsWith(".xls"); } public static boolean isPackage(String url) { return url.endsWith(".gz") || url.endsWith(".tgz") || url.endsWith(".zip") || url.endsWith(".rar") || url.endsWith(".deb") || url.endsWith(".rpm") || url.endsWith(".7z"); } public static boolean isApp(String url) { return url.endsWith(".exe") || url.endsWith(".bin") || url.endsWith(".bat") || url.endsWith(".dmg"); } public static boolean isImage(String url) { return url.endsWith(".png") || url.endsWith(".jpeg") || url.endsWith(".gif") || url.endsWith(".jpg") || url.endsWith(".bmp") || url.endsWith(".ico") || url.endsWith(".eps"); } /** * @see http * ://blogs.sun.com/CoreJavaTechTips/entry/cookie_handling_in_java_se */ public static void enableCookieMgmt() { CookieManager manager = new CookieManager(); manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL); CookieHandler.setDefault(manager); } /** * @see http * ://stackoverflow.com/questions/2529682/setting-user-agent-of-a-java * -urlconnection */ public static void enableUserAgentOverwrite() { System.setProperty("http.agent", ""); } public static String getUrlFromUglyGoogleRedirect(String url) { if (url.startsWith("http://www.google.com/url?")) { url = url.substring("http://www.google.com/url?".length()); String arr[] = urlDecode(url).split("\\&"); if (arr != null) for (String str : arr) { if (str.startsWith("q=")) return str.substring("q=".length()); } } return null; } public static String getUrlFromUglyFacebookRedirect(String url) { if (url.startsWith("http://www.facebook.com/l.php?u=")) { url = url.substring("http://www.facebook.com/l.php?u=".length()); return urlDecode(url); } return null; } public static String urlEncode(String str) { try { return URLEncoder.encode(str, UTF8); } catch (UnsupportedEncodingException ex) { return str; } } public static String urlDecode(String str) { try { return URLDecoder.decode(str, UTF8); } catch (UnsupportedEncodingException ex) { return str; } } /** * Popular sites uses the #! to indicate the importance of the following * chars. Ugly but true. Such as: facebook, twitter, gizmodo, ... */ public static String removeHashbang(String url) { return url.replaceFirst("#!", ""); } public static String printNode(Element root) { return printNode(root, 0); } public static String printNode(Element root, int indentation) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indentation; i++) { sb.append(' '); } sb.append(root.tagName()); sb.append(":"); sb.append(root.ownText()); sb.append("\n"); for (Element el : root.children()) { sb.append(printNode(el, indentation + 1)); sb.append("\n"); } return sb.toString(); } public static String estimateDate(String url) { int index = url.indexOf("://"); if (index > 0) url = url.substring(index + 3); int year = -1; int yearCounter = -1; int month = -1; int monthCounter = -1; int day = -1; String strs[] = url.split("/"); for (int counter = 0; counter < strs.length; counter++) { String str = strs[counter]; if (str.length() == 4) { try { year = Integer.parseInt(str); } catch (Exception ex) { continue; } if (year < 1970 || year > 3000) { year = -1; continue; } yearCounter = counter; } else if (str.length() == 2) { if (monthCounter < 0 && counter == yearCounter + 1) { try { month = Integer.parseInt(str); } catch (Exception ex) { continue; } if (month < 1 || month > 12) { month = -1; continue; } monthCounter = counter; } else if (counter == monthCounter + 1) { try { day = Integer.parseInt(str); } catch (Exception ex) { } if (day < 1 || day > 31) { day = -1; continue; } break; } } } if (year < 0) return null; StringBuilder str = new StringBuilder(); str.append(year); if (month < 1) return str.toString(); str.append('/'); if (month < 10) str.append('0'); str.append(month); if (day < 1) return str.toString(); str.append('/'); if (day < 10) str.append('0'); str.append(day); return str.toString(); } public static String completeDate(String dateStr) { if (dateStr == null) return null; int index = dateStr.indexOf('/'); if (index > 0) { index = dateStr.indexOf('/', index + 1); if (index > 0) return dateStr; else return dateStr + "/01"; } return dateStr + "/01/01"; } /** * keep in mind: simpleDateFormatter is not thread safe! call completeDate * before applying this formatter. */ public static SimpleDateFormat createDateFormatter() { return new SimpleDateFormat("yyyy/MM/dd", Locale.getDefault()); } // with the help of // http://stackoverflow.com/questions/1828775/httpclient-and-ssl public static void enableAnySSL() { try { SSLContext ctx = SSLContext.getInstance("TLS"); ctx.init(new KeyManager[0], new TrustManager[] { new DefaultTrustManager() }, new SecureRandom()); SSLContext.setDefault(ctx); } catch (Exception ex) { ex.printStackTrace(); } } private static class DefaultTrustManager implements X509TrustManager { @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { } @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { } @Override public X509Certificate[] getAcceptedIssuers() { return null; } } public static int countLetters(String str) { int len = str.length(); int chars = 0; for (int i = 0; i < len; i++) { if (Character.isLetter(str.charAt(i))) chars++; } return chars; } }