Added a Reading Mode that can be accessed from the menu

Reading Mode utilizes the Snacktory library created by karussel which is licensed under the Apache 2.0 license. https://github.com/karussell/snacktory
2015-02-05 15:33:23 -05:00 · 2015-02-05 15:33:23 -05:00 · 10668a019b
commit 10668a019b
parent 313f9fb105
20 changed files with 2659 additions and 80 deletions
--- a/AndroidManifest.xml
+++ b/AndroidManifest.xml
@ -200,6 +200,17 @@
            <intent-filter>
                <action android:name="android.intent.action.BOOKMARK" />

+                <category android:name="android.intent.category.DEFAULT" />
+            </intent-filter>
+        </activity>
+        <activity
+            android:name="acr.browser.lightning.ReadingActivity"
+            android:configChanges="orientation|screenSize|keyboardHidden|keyboard"
+            android:label="@string/reading_mode"
+            android:theme="@style/Theme.SettingsTheme" >
+            <intent-filter>
+                <action android:name="android.intent.action.READING" />
+
                <category android:name="android.intent.category.DEFAULT" />
            </intent-filter>
        </activity>
--- a/libs/jsoup-1.8.1.jar
+++ b/libs/jsoup-1.8.1.jar
--- a/res/layout/license_activity.xml
+++ b/res/layout/license_activity.xml
@ -140,5 +140,39 @@
        android:layout_marginLeft="10dp"
        android:layout_marginRight="10dp"
        android:background="#cdcdcd" />
+    
+    <LinearLayout
+        android:id="@+id/licenseSnactory"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:background="?attr/listChoiceBackgroundIndicator"
+        android:orientation="vertical"
+        android:paddingBottom="10dp"
+        android:paddingTop="10dp" >
+
+        <TextView
+            android:id="@+id/textView5"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:paddingLeft="16dp"
+            android:text="@string/snacktory"
+            android:textAppearance="?android:attr/textAppearanceMedium" />
+
+        <TextView
+            android:id="@+id/textView6"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:paddingLeft="16dp"
+            android:text="@string/apache"
+            android:textAppearance="?android:attr/textAppearanceSmall"
+            android:textColor="@color/light" />
+    </LinearLayout>
+    
+    <LinearLayout
+        android:layout_width="match_parent"
+        android:layout_height="1dp"
+        android:layout_marginLeft="10dp"
+        android:layout_marginRight="10dp"
+        android:background="#cdcdcd" />

 </LinearLayout>
--- a/res/layout/reading_view.xml
+++ b/res/layout/reading_view.xml
@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="utf-8"?>
+<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent"
+    android:orientation="vertical" >
+
+    <include layout="@layout/toolbar_settings" />
+
+    <ScrollView
+        android:layout_width="match_parent"
+        android:layout_height="match_parent" >
+
+        <LinearLayout
+            android:layout_width="match_parent"
+            android:layout_height="wrap_content" 
+            android:padding="20dp"
+            android:orientation="vertical">
+
+            <TextView
+                android:id="@+id/textViewTitle"
+                android:layout_width="match_parent"
+                android:layout_height="wrap_content"
+                android:layout_marginBottom="20dp"
+                android:gravity="center_horizontal|center_vertical"
+                android:text="Large Text"
+                android:textAppearance="?android:attr/textAppearanceLarge" />
+
+            <TextView
+                android:id="@+id/textViewBody"
+                android:layout_width="match_parent"
+                android:layout_height="wrap_content"
+                android:gravity="start"
+                android:text="Medium Text"
+                android:textAppearance="?android:attr/textAppearanceMedium" />
+
+        </LinearLayout>
+    </ScrollView>
+
+</LinearLayout>
--- a/res/menu-xlarge/main.xml
+++ b/res/menu-xlarge/main.xml
@ -1,73 +1,77 @@
-<!--
-  Copyright 2014 A.C.R. Development
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-->
-
-<menu xmlns:android="http://schemas.android.com/apk/res/android" >
-
-    <item
-        android:id="@+id/action_back"
-        android:checkable="false"
-        android:enabled="true"
-        android:icon="?arrowBackDrawable"
-        android:showAsAction="always"
-        android:title="@string/action_back"
-        android:visible="true">
-    </item>
-    <item
-        android:id="@+id/action_forward"
-        android:checkable="false"
-        android:enabled="true"
-        android:icon="?arrowForwardDrawable"
-        android:showAsAction="always"
-        android:title="@string/action_forward"
-        android:visible="true">
-    </item>
-    <item
-        android:id="@+id/action_new_tab"
-        android:title="@string/action_new_tab">
-    </item>
-    <item
-        android:id="@+id/action_incognito"
-        android:title="@string/action_incognito">
-    </item>
-    <item
-        android:id="@+id/action_share"
-        android:title="@string/action_share"/>
-    <item
-        android:id="@+id/action_history"
-        android:title="@string/action_history">
-    </item>
-    <item
-        android:id="@+id/action_find"
-        android:title="@string/action_find">
-    </item>
-    <item
-        android:id="@+id/action_copy"
-        android:title="@string/action_copy">
-    </item>
-    <item
-        android:id="@+id/action_bookmarks"
-        android:title="@string/action_bookmarks">
-    </item>
-    <item
-        android:id="@+id/action_add_bookmark"
-        android:title="@string/action_add_bookmark">
-    </item>
-    <item
-        android:id="@+id/action_settings"
-        android:title="@string/settings">
-    </item>
-
+<!--
+  Copyright 2014 A.C.R. Development
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<menu xmlns:android="http://schemas.android.com/apk/res/android" >
+
+    <item
+        android:id="@+id/action_back"
+        android:checkable="false"
+        android:enabled="true"
+        android:icon="?arrowBackDrawable"
+        android:showAsAction="always"
+        android:title="@string/action_back"
+        android:visible="true">
+    </item>
+    <item
+        android:id="@+id/action_forward"
+        android:checkable="false"
+        android:enabled="true"
+        android:icon="?arrowForwardDrawable"
+        android:showAsAction="always"
+        android:title="@string/action_forward"
+        android:visible="true">
+    </item>
+    <item
+        android:id="@+id/action_new_tab"
+        android:title="@string/action_new_tab">
+    </item>
+    <item
+        android:id="@+id/action_incognito"
+        android:title="@string/action_incognito">
+    </item>
+    <item
+        android:id="@+id/action_share"
+        android:title="@string/action_share"/>
+    <item
+        android:id="@+id/action_history"
+        android:title="@string/action_history">
+    </item>
+    <item
+        android:id="@+id/action_find"
+        android:title="@string/action_find">
+    </item>
+    <item
+        android:id="@+id/action_copy"
+        android:title="@string/action_copy">
+    </item>
+    <item
+        android:id="@+id/action_bookmarks"
+        android:title="@string/action_bookmarks">
+    </item>
+    <item
+        android:id="@+id/action_add_bookmark"
+        android:title="@string/action_add_bookmark">
+    </item>
+    <item
+        android:id="@+id/action_reading_mode"
+        android:title="@string/reading_mode">
+    </item>
+    <item
+        android:id="@+id/action_settings"
+        android:title="@string/settings">
+    </item>
+
 </menu>
--- a/res/menu/main.xml
+++ b/res/menu/main.xml
@ -23,6 +23,7 @@
    <item android:id="@+id/action_copy" android:title="@string/action_copy" ></item>
    <item android:id="@+id/action_bookmarks" android:title="@string/action_bookmarks" ></item>
    <item android:id="@+id/action_add_bookmark" android:title="@string/action_add_bookmark" ></item>
+    <item android:id="@+id/action_reading_mode" android:title="@string/reading_mode" ></item>
    <item android:id="@+id/action_settings" android:title="@string/settings" ></item>
    
 </menu>
--- a/res/values/strings.xml
+++ b/res/values/strings.xml
@ -198,4 +198,8 @@
    <string name="third_party">Block 3rd Party Cookies</string>
    <string name="available_lollipop">This feature is only available on Android 5.0+</string>
    <string name="color_mode">Enable Color Mode</string>
+    <string name="reading_mode">Reader Mode</string>
+    <string name="loading">Loading&#8230;</string>
+    <string name="loading_failed">Couldn\'t load anything from the page.</string>
+    <string name="snacktory">Snacktory</string>
 </resources>
--- a/src/acr/browser/lightning/BrowserActivity.java
+++ b/src/acr/browser/lightning/BrowserActivity.java
@ -179,7 +179,7 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 		mDrawerListRight.setDividerHeight(0);
 		setNavigationDrawerWidth();
 		mDrawerLayout.setDrawerListener(new DrawerLocker());
-		
+
 		mWebpageBitmap = BitmapFactory.decodeResource(getResources(), R.drawable.ic_webpage);
 		mActionBar = getSupportActionBar();
 		final TypedArray styledAttributes = mContext.getTheme().obtainStyledAttributes(
@ -350,7 +350,7 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 				};
 				anim.setDuration(300);
 				anim.setInterpolator(new DecelerateInterpolator());
-				anim.setAnimationListener(new AnimationListener(){
+				anim.setAnimationListener(new AnimationListener() {

 					@Override
 					public void onAnimationStart(Animation animation) {
@ -368,7 +368,7 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 					@Override
 					public void onAnimationRepeat(Animation animation) {
 					}
-					
+
 				});
 				new Handler().postDelayed(new Runnable() {

@ -488,12 +488,12 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 		checkForTor();

 	}
-	
+
 	private class DrawerLocker implements DrawerListener {

 		@Override
 		public void onDrawerClosed(View v) {
-			if(v == mDrawerRight){
+			if (v == mDrawerRight) {
 				mDrawerLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_UNLOCKED, mDrawerLeft);
 			} else {
 				mDrawerLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_UNLOCKED, mDrawerRight);
@ -502,7 +502,7 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl

 		@Override
 		public void onDrawerOpened(View v) {
-			if(v == mDrawerRight){
+			if (v == mDrawerRight) {
 				mDrawerLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_LOCKED_CLOSED, mDrawerLeft);
 			} else {
 				mDrawerLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_LOCKED_CLOSED, mDrawerRight);
@ -516,7 +516,7 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 		@Override
 		public void onDrawerStateChanged(int arg) {
 		}
-		
+
 	}

 	public boolean handleMenuItemClick(MenuItem item) {
@ -596,6 +596,11 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 			case R.id.action_find:
 				findInPage();
 				return true;
+			case R.id.action_reading_mode:
+				Intent read = new Intent(this, ReadingActivity.class);
+				read.putExtra(Constants.LOAD_READING_URL, mCurrentView.getUrl());
+				startActivity(read);
+				return true;
 			default:
 				return super.onOptionsItemSelected(item);
 		}
@ -912,6 +917,11 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 			case R.id.action_find:
 				findInPage();
 				return true;
+			case R.id.action_reading_mode:
+				Intent read = new Intent(this, ReadingActivity.class);
+				read.putExtra(Constants.LOAD_READING_URL, mCurrentView.getUrl());
+				startActivity(read);
+				return true;
 			default:
 				return super.onOptionsItemSelected(item);
 		}
@ -1622,7 +1632,7 @@ public class BrowserActivity extends ActionBarActivity implements BrowserControl
 				}

 			});
-			
+
 			ViewCompat.jumpDrawablesToCurrentState(holder.exit);

 			LightningView web = data.get(position);
--- a/src/acr/browser/lightning/Constants.java
+++ b/src/acr/browser/lightning/Constants.java
@ -29,6 +29,8 @@ public final class Constants {
 	public static final String JAVASCRIPT_INVERT_PAGE = "javascript:(function(){var e='img {-webkit-filter: invert(100%);'+'-moz-filter: invert(100%);'+'-o-filter: invert(100%);'+'-ms-filter: invert(100%); }',t=document.getElementsByTagName('head')[0],n=document.createElement('style');if(!window.counter){window.counter=1}else{window.counter++;if(window.counter%2==0){var e='html {-webkit-filter: invert(0%); -moz-filter: invert(0%); -o-filter: invert(0%); -ms-filter: invert(0%); }'}}n.type='text/css';if(n.styleSheet){n.styleSheet.cssText=e}else{n.appendChild(document.createTextNode(e))}t.appendChild(n)})();";
 	public static final String JAVASCRIPT_TEXT_REFLOW = "javascript:document.getElementsByTagName('body')[0].style.width=window.innerWidth+'px';";
 	
+	public static final String LOAD_READING_URL = "ReadingUrl";
+	
 	public static final String SEPARATOR = "\\|\\$\\|SEPARATOR\\|\\$\\|";
 	public static final String HTTP = "http://";
 	public static final String HTTPS = "https://";
--- a/src/acr/browser/lightning/LicenseActivity.java
+++ b/src/acr/browser/lightning/LicenseActivity.java
@ -30,6 +30,7 @@ public class LicenseActivity extends ActionBarActivity implements View.OnClickLi
 		findViewById(R.id.licenseAOSP).setOnClickListener(this);
 		findViewById(R.id.licenseHosts).setOnClickListener(this);
 		findViewById(R.id.licenseOrbot).setOnClickListener(this);
+		findViewById(R.id.licenseSnactory).setOnClickListener(this);
 	}

 	@Override
@ -47,6 +48,9 @@ public class LicenseActivity extends ActionBarActivity implements View.OnClickLi
 			case R.id.licenseOrbot:
 				actionView("http://www.gnu.org/licenses/lgpl.html");
 				break;
+			case R.id.licenseSnactory:
+				actionView("http://www.apache.org/licenses/LICENSE-2.0");
+				break;
 		}
 	}

--- a/src/acr/browser/lightning/Reading/ArticleTextExtractor.java
+++ b/src/acr/browser/lightning/Reading/ArticleTextExtractor.java
@ -0,0 +1,619 @@
+package acr.browser.lightning.Reading;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import android.util.Log;
+
+/**
+ * This class is thread safe.
+ * 
+ * @author Alex P (ifesdjeen from jreadability)
+ * @author Peter Karich
+ */
+public class ArticleTextExtractor {
+
+	// Interessting nodes
+	private static final Pattern NODES = Pattern.compile("p|div|td|h1|h2|article|section");
+	// Unlikely candidates
+	private String unlikelyStr;
+	private Pattern UNLIKELY;
+	// Most likely positive candidates
+	private String positiveStr;
+	private Pattern POSITIVE;
+	// Most likely negative candidates
+	private String negativeStr;
+	private Pattern NEGATIVE;
+	private static final Pattern NEGATIVE_STYLE = Pattern
+			.compile("hidden|display: ?none|font-size: ?small");
+	private static final Set<String> IGNORED_TITLE_PARTS = new LinkedHashSet<String>() {
+		{
+			add("hacker news");
+			add("facebook");
+		}
+	};
+	private static final OutputFormatter DEFAULT_FORMATTER = new OutputFormatter();
+	private OutputFormatter formatter = DEFAULT_FORMATTER;
+
+	public ArticleTextExtractor() {
+		setUnlikely("com(bx|ment|munity)|dis(qus|cuss)|e(xtra|[-]?mail)|foot|"
+				+ "header|menu|re(mark|ply)|rss|sh(are|outbox)|sponsor"
+				+ "a(d|ll|gegate|rchive|ttachment)|(pag(er|ination))|popup|print|"
+				+ "login|si(debar|gn|ngle)");
+		setPositive("(^(body|content|h?entry|main|page|post|text|blog|story|haupt))"
+				+ "|arti(cle|kel)|instapaper_body");
+		setNegative("nav($|igation)|user|com(ment|bx)|(^com-)|contact|"
+				+ "foot|masthead|(me(dia|ta))|outbrain|promo|related|scroll|(sho(utbox|pping))|"
+				+ "sidebar|sponsor|tags|tool|widget|player|disclaimer|toc|infobox|vcard");
+	}
+
+	public ArticleTextExtractor setUnlikely(String unlikelyStr) {
+		this.unlikelyStr = unlikelyStr;
+		UNLIKELY = Pattern.compile(unlikelyStr);
+		return this;
+	}
+
+	public ArticleTextExtractor addUnlikely(String unlikelyMatches) {
+		return setUnlikely(unlikelyStr + "|" + unlikelyMatches);
+	}
+
+	public ArticleTextExtractor setPositive(String positiveStr) {
+		this.positiveStr = positiveStr;
+		POSITIVE = Pattern.compile(positiveStr);
+		return this;
+	}
+
+	public ArticleTextExtractor addPositive(String pos) {
+		return setPositive(positiveStr + "|" + pos);
+	}
+
+	public ArticleTextExtractor setNegative(String negativeStr) {
+		this.negativeStr = negativeStr;
+		NEGATIVE = Pattern.compile(negativeStr);
+		return this;
+	}
+
+	public ArticleTextExtractor addNegative(String neg) {
+		setNegative(negativeStr + "|" + neg);
+		return this;
+	}
+
+	public void setOutputFormatter(OutputFormatter formatter) {
+		this.formatter = formatter;
+	}
+
+	/**
+	 * @param html
+	 *            extracts article text from given html string. wasn't tested
+	 *            with improper HTML, although jSoup should be able to handle
+	 *            minor stuff.
+	 * @returns extracted article, all HTML tags stripped
+	 */
+	public JResult extractContent(Document doc) throws Exception {
+		return extractContent(new JResult(), doc, formatter);
+	}
+
+	public JResult extractContent(Document doc, OutputFormatter formatter) throws Exception {
+		return extractContent(new JResult(), doc, formatter);
+	}
+
+	public JResult extractContent(String html) throws Exception {
+		return extractContent(new JResult(), html);
+	}
+
+	public JResult extractContent(JResult res, String html) throws Exception {
+		return extractContent(res, html, formatter);
+	}
+
+	public JResult extractContent(JResult res, String html, OutputFormatter formatter)
+			throws Exception {
+		if (html.isEmpty())
+			throw new IllegalArgumentException("html string is empty!?");
+
+		// http://jsoup.org/cookbook/extracting-data/selector-syntax
+		return extractContent(res, Jsoup.parse(html), formatter);
+	}
+
+	public JResult extractContent(JResult res, Document doc, OutputFormatter formatter)
+			throws Exception {
+		if (doc == null)
+			throw new NullPointerException("missing document");
+
+		res.setTitle(extractTitle(doc));
+		res.setDescription(extractDescription(doc));
+		res.setCanonicalUrl(extractCanonicalUrl(doc));
+
+		// now remove the clutter
+		prepareDocument(doc);
+
+		// init elements
+		Collection<Element> nodes = getNodes(doc);
+		int maxWeight = 0;
+		Element bestMatchElement = null;
+		for (Element entry : nodes) {
+			int currentWeight = getWeight(entry);
+			if (currentWeight > maxWeight) {
+				maxWeight = currentWeight;
+				bestMatchElement = entry;
+				if (maxWeight > 200)
+					break;
+			}
+		}
+
+		if (bestMatchElement != null) {
+			List<ImageResult> images = new ArrayList<ImageResult>();
+			Element imgEl = determineImageSource(bestMatchElement, images);
+			if (imgEl != null) {
+				res.setImageUrl(SHelper.replaceSpaces(imgEl.attr("src")));
+				// TODO remove parent container of image if it is contained in
+				// bestMatchElement
+				// to avoid image subtitles flooding in
+
+				res.setImages(images);
+			}
+
+			// clean before grabbing text
+			String text = formatter.getFormattedText(bestMatchElement);
+			text = removeTitleFromText(text, res.getTitle());
+			// this fails for short facebook post and probably tweets:
+			// text.length() > res.getDescription().length()
+			if (text.length() > res.getTitle().length()) {
+				res.setText(text);
+				// print("best element:", bestMatchElement);
+			}
+			res.setTextList(formatter.getTextList(bestMatchElement));
+		}
+
+		if (res.getImageUrl().isEmpty()) {
+			res.setImageUrl(extractImageUrl(doc));
+		}
+
+		res.setRssUrl(extractRssUrl(doc));
+		res.setVideoUrl(extractVideoUrl(doc));
+		res.setFaviconUrl(extractFaviconUrl(doc));
+		res.setKeywords(extractKeywords(doc));
+		return res;
+	}
+
+	protected String extractTitle(Document doc) {
+		String title = cleanTitle(doc.title());
+		if (title.isEmpty()) {
+			title = SHelper.innerTrim(doc.select("head title").text());
+			if (title.isEmpty()) {
+				title = SHelper.innerTrim(doc.select("head meta[name=title]").attr("content"));
+				if (title.isEmpty()) {
+					title = SHelper.innerTrim(doc.select("head meta[property=og:title]").attr(
+							"content"));
+					if (title.isEmpty()) {
+						title = SHelper.innerTrim(doc.select("head meta[name=twitter:title]").attr(
+								"content"));
+					}
+				}
+			}
+		}
+		return title;
+	}
+
+	protected String extractCanonicalUrl(Document doc) {
+		String url = SHelper.replaceSpaces(doc.select("head link[rel=canonical]").attr("href"));
+		if (url.isEmpty()) {
+			url = SHelper.replaceSpaces(doc.select("head meta[property=og:url]").attr("content"));
+			if (url.isEmpty()) {
+				url = SHelper.replaceSpaces(doc.select("head meta[name=twitter:url]").attr(
+						"content"));
+			}
+		}
+		return url;
+	}
+
+	protected String extractDescription(Document doc) {
+		String description = SHelper.innerTrim(doc.select("head meta[name=description]").attr(
+				"content"));
+		if (description.isEmpty()) {
+			description = SHelper.innerTrim(doc.select("head meta[property=og:description]").attr(
+					"content"));
+			if (description.isEmpty()) {
+				description = SHelper.innerTrim(doc.select("head meta[name=twitter:description]")
+						.attr("content"));
+			}
+		}
+		return description;
+	}
+
+	protected Collection<String> extractKeywords(Document doc) {
+		String content = SHelper.innerTrim(doc.select("head meta[name=keywords]").attr("content"));
+
+		if (content != null) {
+			if (content.startsWith("[") && content.endsWith("]"))
+				content = content.substring(1, content.length() - 1);
+
+			String[] split = content.split("\\s*,\\s*");
+			if (split.length > 1 || (split.length > 0 && !"".equals(split[0])))
+				return Arrays.asList(split);
+		}
+		return Collections.emptyList();
+	}
+
+	/**
+	 * Tries to extract an image url from metadata if determineImageSource
+	 * failed
+	 * 
+	 * @return image url or empty str
+	 */
+	protected String extractImageUrl(Document doc) {
+		// use open graph tag to get image
+		String imageUrl = SHelper.replaceSpaces(doc.select("head meta[property=og:image]").attr(
+				"content"));
+		if (imageUrl.isEmpty()) {
+			imageUrl = SHelper.replaceSpaces(doc.select("head meta[name=twitter:image]").attr(
+					"content"));
+			if (imageUrl.isEmpty()) {
+				// prefer link over thumbnail-meta if empty
+				imageUrl = SHelper.replaceSpaces(doc.select("link[rel=image_src]").attr("href"));
+				if (imageUrl.isEmpty()) {
+					imageUrl = SHelper.replaceSpaces(doc.select("head meta[name=thumbnail]").attr(
+							"content"));
+				}
+			}
+		}
+		return imageUrl;
+	}
+
+	protected String extractRssUrl(Document doc) {
+		return SHelper.replaceSpaces(doc.select("link[rel=alternate]")
+				.select("link[type=application/rss+xml]").attr("href"));
+	}
+
+	protected String extractVideoUrl(Document doc) {
+		return SHelper.replaceSpaces(doc.select("head meta[property=og:video]").attr("content"));
+	}
+
+	protected String extractFaviconUrl(Document doc) {
+		String faviconUrl = SHelper.replaceSpaces(doc.select("head link[rel=icon]").attr("href"));
+		if (faviconUrl.isEmpty()) {
+			faviconUrl = SHelper.replaceSpaces(doc.select(
+					"head link[rel^=shortcut],link[rel$=icon]").attr("href"));
+		}
+		return faviconUrl;
+	}
+
+	/**
+	 * Weights current element. By matching it with positive candidates and
+	 * weighting child nodes. Since it's impossible to predict which exactly
+	 * names, ids or class names will be used in HTML, major role is played by
+	 * child nodes
+	 * 
+	 * @param e
+	 *            Element to weight, along with child nodes
+	 */
+	protected int getWeight(Element e) {
+		int weight = calcWeight(e);
+		weight += (int) Math.round(e.ownText().length() / 100.0 * 10);
+		weight += weightChildNodes(e);
+		return weight;
+	}
+
+	/**
+	 * Weights a child nodes of given Element. During tests some difficulties
+	 * were met. For instanance, not every single document has nested paragraph
+	 * tags inside of the major article tag. Sometimes people are adding one
+	 * more nesting level. So, we're adding 4 points for every 100 symbols
+	 * contained in tag nested inside of the current weighted element, but only
+	 * 3 points for every element that's nested 2 levels deep. This way we give
+	 * more chances to extract the element that has less nested levels,
+	 * increasing probability of the correct extraction.
+	 * 
+	 * @param rootEl
+	 *            Element, who's child nodes will be weighted
+	 */
+	protected int weightChildNodes(Element rootEl) {
+		int weight = 0;
+		Element caption = null;
+		List<Element> pEls = new ArrayList<Element>(5);
+		for (Element child : rootEl.children()) {
+			String ownText = child.ownText();
+			int ownTextLength = ownText.length();
+			if (ownTextLength < 20)
+				continue;
+
+			if (ownTextLength > 200)
+				weight += Math.max(50, ownTextLength / 10);
+
+			if (child.tagName().equals("h1") || child.tagName().equals("h2")) {
+				weight += 30;
+			} else if (child.tagName().equals("div") || child.tagName().equals("p")) {
+				weight += calcWeightForChild(child, ownText);
+				if (child.tagName().equals("p") && ownTextLength > 50)
+					pEls.add(child);
+
+				if (child.className().toLowerCase(Locale.getDefault()).equals("caption"))
+					caption = child;
+			}
+		}
+
+		// use caption and image
+		if (caption != null)
+			weight += 30;
+
+		if (pEls.size() >= 2) {
+			for (Element subEl : rootEl.children()) {
+				if ("h1;h2;h3;h4;h5;h6".contains(subEl.tagName())) {
+					weight += 20;
+					// headerEls.add(subEl);
+				} else if ("table;li;td;th".contains(subEl.tagName())) {
+					addScore(subEl, -30);
+				}
+
+				if ("p".contains(subEl.tagName()))
+					addScore(subEl, 30);
+			}
+		}
+		return weight;
+	}
+
+	public void addScore(Element el, int score) {
+		int old = getScore(el);
+		setScore(el, score + old);
+	}
+
+	public int getScore(Element el) {
+		int old = 0;
+		try {
+			old = Integer.parseInt(el.attr("gravityScore"));
+		} catch (Exception ex) {
+		}
+		return old;
+	}
+
+	public void setScore(Element el, int score) {
+		el.attr("gravityScore", Integer.toString(score));
+	}
+
+	private int calcWeightForChild(Element child, String ownText) {
+		int c = SHelper.count(ownText, "&quot;");
+		c += SHelper.count(ownText, "&lt;");
+		c += SHelper.count(ownText, "&gt;");
+		c += SHelper.count(ownText, "px");
+		int val;
+		if (c > 5)
+			val = -30;
+		else
+			val = (int) Math.round(ownText.length() / 25.0);
+
+		addScore(child, val);
+		return val;
+	}
+
+	private int calcWeight(Element e) {
+		int weight = 0;
+		if (POSITIVE.matcher(e.className()).find())
+			weight += 35;
+
+		if (POSITIVE.matcher(e.id()).find())
+			weight += 40;
+
+		if (UNLIKELY.matcher(e.className()).find())
+			weight -= 20;
+
+		if (UNLIKELY.matcher(e.id()).find())
+			weight -= 20;
+
+		if (NEGATIVE.matcher(e.className()).find())
+			weight -= 50;
+
+		if (NEGATIVE.matcher(e.id()).find())
+			weight -= 50;
+
+		String style = e.attr("style");
+		if (style != null && !style.isEmpty() && NEGATIVE_STYLE.matcher(style).find())
+			weight -= 50;
+		return weight;
+	}
+
+	public Element determineImageSource(Element el, List<ImageResult> images) {
+		int maxWeight = 0;
+		Element maxNode = null;
+		Elements els = el.select("img");
+		if (els.isEmpty())
+			els = el.parent().select("img");
+
+		double score = 1;
+		for (Element e : els) {
+			String sourceUrl = e.attr("src");
+			if (sourceUrl.isEmpty() || isAdImage(sourceUrl))
+				continue;
+
+			int weight = 0;
+			int height = 0;
+			try {
+				height = Integer.parseInt(e.attr("height"));
+				if (height >= 50)
+					weight += 20;
+				else
+					weight -= 20;
+			} catch (Exception ex) {
+			}
+
+			int width = 0;
+			try {
+				width = Integer.parseInt(e.attr("width"));
+				if (width >= 50)
+					weight += 20;
+				else
+					weight -= 20;
+			} catch (Exception ex) {
+			}
+			String alt = e.attr("alt");
+			if (alt.length() > 35)
+				weight += 20;
+
+			String title = e.attr("title");
+			if (title.length() > 35)
+				weight += 20;
+
+			String rel = null;
+			boolean noFollow = false;
+			if (e.parent() != null) {
+				rel = e.parent().attr("rel");
+				if (rel != null && rel.contains("nofollow")) {
+					noFollow = rel.contains("nofollow");
+					weight -= 40;
+				}
+			}
+
+			weight = (int) (weight * score);
+			if (weight > maxWeight) {
+				maxWeight = weight;
+				maxNode = e;
+				score = score / 2;
+			}
+
+			ImageResult image = new ImageResult(sourceUrl, weight, title, height, width, alt,
+					noFollow);
+			images.add(image);
+		}
+
+		Collections.sort(images, new ImageComparator());
+		return maxNode;
+	}
+
+	/**
+	 * Prepares document. Currently only stipping unlikely candidates, since
+	 * from time to time they're getting more score than good ones especially in
+	 * cases when major text is short.
+	 * 
+	 * @param doc
+	 *            document to prepare. Passed as reference, and changed inside
+	 *            of function
+	 */
+	protected void prepareDocument(Document doc) {
+		// stripUnlikelyCandidates(doc);
+		removeScriptsAndStyles(doc);
+	}
+
+	/**
+	 * Removes unlikely candidates from HTML. Currently takes id and class name
+	 * and matches them against list of patterns
+	 * 
+	 * @param doc
+	 *            document to strip unlikely candidates from
+	 */
+	protected void stripUnlikelyCandidates(Document doc) {
+		for (Element child : doc.select("body").select("*")) {
+			String className = child.className().toLowerCase(Locale.getDefault());
+			String id = child.id().toLowerCase(Locale.getDefault());
+
+			if (NEGATIVE.matcher(className).find() || NEGATIVE.matcher(id).find()) {
+				// print("REMOVE:", child);
+				child.remove();
+			}
+		}
+	}
+
+	private Document removeScriptsAndStyles(Document doc) {
+		Elements scripts = doc.getElementsByTag("script");
+		for (Element item : scripts) {
+			item.remove();
+		}
+
+		Elements noscripts = doc.getElementsByTag("noscript");
+		for (Element item : noscripts) {
+			item.remove();
+		}
+
+		Elements styles = doc.getElementsByTag("style");
+		for (Element style : styles) {
+			style.remove();
+		}
+
+		return doc;
+	}
+
+	private boolean isAdImage(String imageUrl) {
+		return SHelper.count(imageUrl, "ad") >= 2;
+	}
+
+	/**
+	 * Match only exact matching as longestSubstring can be too fuzzy
+	 */
+	public String removeTitleFromText(String text, String title) {
+		// don't do this as its terrible to read
+		// int index1 = text.toLowerCase().indexOf(title.toLowerCase());
+		// if (index1 >= 0)
+		// text = text.substring(index1 + title.length());
+		// return text.trim();
+		return text;
+	}
+
+	/**
+	 * @return a set of all important nodes
+	 */
+	public Collection<Element> getNodes(Document doc) {
+		Set<Element> nodes = new HashSet<Element>(64);
+		int score = 100;
+		for (Element el : doc.select("body").select("*")) {
+			if (NODES.matcher(el.tagName()).matches()) {
+				nodes.add(el);
+				setScore(el, score);
+				score = score / 2;
+			}
+		}
+		return nodes;
+
+	}
+
+	public String cleanTitle(String title) {
+		StringBuilder res = new StringBuilder();
+		// int index = title.lastIndexOf("|");
+		// if (index > 0 && title.length() / 2 < index)
+		// title = title.substring(0, index + 1);
+
+		int counter = 0;
+		String[] strs = title.split("\\|");
+		for (String part : strs) {
+			if (IGNORED_TITLE_PARTS.contains(part.toLowerCase(Locale.getDefault()).trim()))
+				continue;
+
+			if (counter == strs.length - 1 && res.length() > part.length())
+				continue;
+
+			if (counter > 0)
+				res.append("|");
+
+			res.append(part);
+			counter++;
+		}
+
+		return SHelper.innerTrim(res.toString());
+	}
+
+	/**
+	 * Comparator for Image by weight
+	 * 
+	 * @author Chris Alexander, chris@chris-alexander.co.uk
+	 * 
+	 */
+	public class ImageComparator implements Comparator<ImageResult> {
+
+		@Override
+		public int compare(ImageResult o1, ImageResult o2) {
+			// Returns the highest weight first
+			return o2.weight.compareTo(o1.weight);
+		}
+	}
+}
--- a/src/acr/browser/lightning/Reading/Converter.java
+++ b/src/acr/browser/lightning/Reading/Converter.java
@ -0,0 +1,243 @@
+/*
+ *  Copyright 2011 Peter Karich 
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package acr.browser.lightning.Reading;
+
+import java.io.*;
+import java.net.SocketTimeoutException;
+import java.nio.charset.Charset;
+import java.util.Locale;
+
+import acr.browser.lightning.Constants;
+import android.util.Log;
+
+/**
+ * This class is not thread safe. Use one new instance every time due to
+ * encoding variable.
+ * 
+ * @author Peter Karich
+ */
+public class Converter {
+
+	public final static String UTF8 = "UTF-8";
+	public final static String ISO = "ISO-8859-1";
+	public final static int K2 = 2048;
+	private int maxBytes = 1000000 / 2;
+	private String encoding;
+	private String url;
+
+	public Converter(String urlOnlyHint) {
+		url = urlOnlyHint;
+	}
+
+	public Converter() {
+	}
+
+	public Converter setMaxBytes(int maxBytes) {
+		this.maxBytes = maxBytes;
+		return this;
+	}
+
+	public static String extractEncoding(String contentType) {
+		String[] values;
+		if (contentType != null)
+			values = contentType.split(";");
+		else
+			values = new String[0];
+
+		String charset = "";
+
+		for (String value : values) {
+			value = value.trim().toLowerCase(Locale.getDefault());
+
+			if (value.startsWith("charset="))
+				charset = value.substring("charset=".length());
+		}
+
+		// http1.1 says ISO-8859-1 is the default charset
+		if (charset.length() == 0)
+			charset = ISO;
+
+		return charset;
+	}
+
+	public String getEncoding() {
+		if (encoding == null)
+			return "";
+		return encoding.toLowerCase(Locale.getDefault());
+	}
+
+	public String streamToString(InputStream is) {
+		return streamToString(is, maxBytes, encoding);
+	}
+
+	public String streamToString(InputStream is, String enc) {
+		return streamToString(is, maxBytes, enc);
+	}
+
+	/**
+	 * reads bytes off the string and returns a string
+	 * 
+	 * @param is
+	 * @param maxBytes
+	 *            The max bytes that we want to read from the input stream
+	 * @return String
+	 */
+	public String streamToString(InputStream is, int maxBytes, String enc) {
+		encoding = enc;
+		// Http 1.1. standard is iso-8859-1 not utf8 :(
+		// but we force utf-8 as youtube assumes it ;)
+		if (encoding == null || encoding.isEmpty())
+			encoding = UTF8;
+
+		BufferedInputStream in = null;
+		try {
+			in = new BufferedInputStream(is, K2);
+			ByteArrayOutputStream output = new ByteArrayOutputStream();
+
+			// detect encoding with the help of meta tag
+			try {
+				in.mark(K2 * 2);
+				String tmpEnc = detectCharset("charset=", output, in, encoding);
+				if (tmpEnc != null)
+					encoding = tmpEnc;
+				else {
+					Log.d(Constants.TAG, "no charset found in first stage");
+					// detect with the help of xml beginning ala
+					// encoding="charset"
+					tmpEnc = detectCharset("encoding=", output, in, encoding);
+					if (tmpEnc != null)
+						encoding = tmpEnc;
+					else
+						Log.d(Constants.TAG, "no charset found in second stage");
+				}
+
+				if (!Charset.isSupported(encoding))
+					throw new UnsupportedEncodingException(encoding);
+			} catch (UnsupportedEncodingException e) {
+				Log.d(Constants.TAG,
+						"Using default encoding:" + UTF8 + " problem:" + e.getMessage()
+								+ " encoding:" + encoding + " " + url);
+				encoding = UTF8;
+			}
+
+			// SocketException: Connection reset
+			// IOException: missing CR => problem on server (probably some xml
+			// character thing?)
+			// IOException: Premature EOF => socket unexpectly closed from
+			// server
+			int bytesRead = output.size();
+			byte[] arr = new byte[K2];
+			while (true) {
+				if (bytesRead >= maxBytes) {
+					Log.d(Constants.TAG, "Maxbyte of " + maxBytes
+							+ " exceeded! Maybe html is now broken but try it nevertheless. Url: "
+							+ url);
+					break;
+				}
+
+				int n = in.read(arr);
+				if (n < 0)
+					break;
+				bytesRead += n;
+				output.write(arr, 0, n);
+			}
+
+			return output.toString(encoding);
+		} catch (SocketTimeoutException e) {
+			Log.e(Constants.TAG, e.toString() + " url:" + url);
+		} catch (IOException e) {
+			Log.e(Constants.TAG, e.toString() + " url:" + url);
+		} finally {
+			if (in != null) {
+				try {
+					in.close();
+				} catch (Exception e) {
+				}
+			}
+		}
+		return "";
+	}
+
+	/**
+	 * This method detects the charset even if the first call only returns some
+	 * bytes. It will read until 4K bytes are reached and then try to determine
+	 * the encoding
+	 * 
+	 * @throws IOException
+	 */
+	protected String detectCharset(String key, ByteArrayOutputStream bos, BufferedInputStream in,
+			String enc) throws IOException {
+
+		// Grab better encoding from stream
+		byte[] arr = new byte[K2];
+		int nSum = 0;
+		while (nSum < K2) {
+			int n = in.read(arr);
+			if (n < 0)
+				break;
+
+			nSum += n;
+			bos.write(arr, 0, n);
+		}
+
+		String str = bos.toString(enc);
+		int encIndex = str.indexOf(key);
+		int clength = key.length();
+		if (encIndex > 0) {
+			char startChar = str.charAt(encIndex + clength);
+			int lastEncIndex;
+			if (startChar == '\'')
+				// if we have charset='something'
+				lastEncIndex = str.indexOf("'", ++encIndex + clength);
+			else if (startChar == '\"')
+				// if we have charset="something"
+				lastEncIndex = str.indexOf("\"", ++encIndex + clength);
+			else {
+				// if we have "text/html; charset=utf-8"
+				int first = str.indexOf("\"", encIndex + clength);
+				if (first < 0)
+					first = Integer.MAX_VALUE;
+
+				// or "text/html; charset=utf-8 "
+				int sec = str.indexOf(" ", encIndex + clength);
+				if (sec < 0)
+					sec = Integer.MAX_VALUE;
+				lastEncIndex = Math.min(first, sec);
+
+				// or "text/html; charset=utf-8 '
+				int third = str.indexOf("'", encIndex + clength);
+				if (third > 0)
+					lastEncIndex = Math.min(lastEncIndex, third);
+			}
+
+			// re-read byte array with different encoding
+			// assume that the encoding string cannot be greater than 40 chars
+			if (lastEncIndex > encIndex + clength && lastEncIndex < encIndex + clength + 40) {
+				String tmpEnc = SHelper.encodingCleanup(str.substring(encIndex + clength,
+						lastEncIndex));
+				try {
+					in.reset();
+					bos.reset();
+					return tmpEnc;
+				} catch (IOException ex) {
+					Log.e(Constants.TAG, "Couldn't reset stream to re-read with new encoding "
+							+ tmpEnc + " " + ex.toString());
+				}
+			}
+		}
+		return null;
+	}
+}
--- a/src/acr/browser/lightning/Reading/HtmlFetcher.java
+++ b/src/acr/browser/lightning/Reading/HtmlFetcher.java
@ -0,0 +1,445 @@
+/*
+ *  Copyright 2011 Peter Karich 
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package acr.browser.lightning.Reading;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.Proxy;
+import java.net.URL;
+import java.util.LinkedHashSet;
+import java.util.Locale;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.Inflater;
+import java.util.zip.InflaterInputStream;
+
+import acr.browser.lightning.Constants;
+import android.util.Log;
+
+/**
+ * Class to fetch articles. This class is thread safe.
+ * 
+ * @author Peter Karich
+ */
+public class HtmlFetcher {
+
+	static {
+		SHelper.enableCookieMgmt();
+		SHelper.enableUserAgentOverwrite();
+		SHelper.enableAnySSL();
+	}
+
+	public static void main(String[] args) throws Exception {
+		BufferedReader reader = new BufferedReader(new FileReader("urls.txt"));
+		String line = null;
+		Set<String> existing = new LinkedHashSet<String>();
+		while ((line = reader.readLine()) != null) {
+			int index1 = line.indexOf("\"");
+			int index2 = line.indexOf("\"", index1 + 1);
+			String url = line.substring(index1 + 1, index2);
+			String domainStr = SHelper.extractDomain(url, true);
+			String counterStr = "";
+			// TODO more similarities
+			if (existing.contains(domainStr))
+				counterStr = "2";
+			else
+				existing.add(domainStr);
+
+			String html = new HtmlFetcher().fetchAsString(url, 20000);
+			String outFile = domainStr + counterStr + ".html";
+			BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
+			writer.write(html);
+			writer.close();
+		}
+		reader.close();
+	}
+
+	private String referrer = "https://github.com/karussell/snacktory";
+	private String userAgent = "Mozilla/5.0 (compatible; Snacktory; +" + referrer + ")";
+	private String cacheControl = "max-age=0";
+	private String language = "en-us";
+	private String accept = "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
+	private String charset = "UTF-8";
+	private SCache cache;
+	private AtomicInteger cacheCounter = new AtomicInteger(0);
+	private int maxTextLength = -1;
+	private ArticleTextExtractor extractor = new ArticleTextExtractor();
+	private Set<String> furtherResolveNecessary = new LinkedHashSet<String>() {
+		{
+			add("bit.ly");
+			add("cli.gs");
+			add("deck.ly");
+			add("fb.me");
+			add("feedproxy.google.com");
+			add("flic.kr");
+			add("fur.ly");
+			add("goo.gl");
+			add("is.gd");
+			add("ink.co");
+			add("j.mp");
+			add("lnkd.in");
+			add("on.fb.me");
+			add("ow.ly");
+			add("plurl.us");
+			add("sns.mx");
+			add("snurl.com");
+			add("su.pr");
+			add("t.co");
+			add("tcrn.ch");
+			add("tl.gd");
+			add("tiny.cc");
+			add("tinyurl.com");
+			add("tmi.me");
+			add("tr.im");
+			add("twurl.nl");
+		}
+	};
+
+	public HtmlFetcher() {
+	}
+
+	public void setExtractor(ArticleTextExtractor extractor) {
+		this.extractor = extractor;
+	}
+
+	public ArticleTextExtractor getExtractor() {
+		return extractor;
+	}
+
+	public HtmlFetcher setCache(SCache cache) {
+		this.cache = cache;
+		return this;
+	}
+
+	public SCache getCache() {
+		return cache;
+	}
+
+	public int getCacheCounter() {
+		return cacheCounter.get();
+	}
+
+	public HtmlFetcher clearCacheCounter() {
+		cacheCounter.set(0);
+		return this;
+	}
+
+	public HtmlFetcher setMaxTextLength(int maxTextLength) {
+		this.maxTextLength = maxTextLength;
+		return this;
+	}
+
+	public int getMaxTextLength() {
+		return maxTextLength;
+	}
+
+	public void setAccept(String accept) {
+		this.accept = accept;
+	}
+
+	public void setCharset(String charset) {
+		this.charset = charset;
+	}
+
+	public void setCacheControl(String cacheControl) {
+		this.cacheControl = cacheControl;
+	}
+
+	public String getLanguage() {
+		return language;
+	}
+
+	public void setLanguage(String language) {
+		this.language = language;
+	}
+
+	public String getReferrer() {
+		return referrer;
+	}
+
+	public HtmlFetcher setReferrer(String referrer) {
+		this.referrer = referrer;
+		return this;
+	}
+
+	public String getUserAgent() {
+		return userAgent;
+	}
+
+	public void setUserAgent(String userAgent) {
+		this.userAgent = userAgent;
+	}
+
+	public String getAccept() {
+		return accept;
+	}
+
+	public String getCacheControl() {
+		return cacheControl;
+	}
+
+	public String getCharset() {
+		return charset;
+	}
+
+	public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
+		String originalUrl = url;
+		url = SHelper.removeHashbang(url);
+		String gUrl = SHelper.getUrlFromUglyGoogleRedirect(url);
+		if (gUrl != null)
+			url = gUrl;
+		else {
+			gUrl = SHelper.getUrlFromUglyFacebookRedirect(url);
+			if (gUrl != null)
+				url = gUrl;
+		}
+
+		if (resolve) {
+			// check if we can avoid resolving the URL (which hits the website!)
+			JResult res = getFromCache(url, originalUrl);
+			if (res != null)
+				return res;
+
+			String resUrl = getResolvedUrl(url, timeout);
+			if (resUrl.isEmpty()) {
+				Log.d(Constants.TAG, "resolved url is empty. Url is: " + url);
+
+				JResult result = new JResult();
+				if (cache != null)
+					cache.put(url, result);
+				return result.setUrl(url);
+			}
+
+			// if resolved url is longer then use it!
+			if (resUrl != null && resUrl.trim().length() > url.length()) {
+				// this is necessary e.g. for some homebaken url resolvers which
+				// return
+				// the resolved url relative to url!
+				url = SHelper.useDomainOfFirstArg4Second(url, resUrl);
+			}
+		}
+
+		// check if we have the (resolved) URL in cache
+		JResult res = getFromCache(url, originalUrl);
+		if (res != null)
+			return res;
+
+		JResult result = new JResult();
+		// or should we use? <link rel="canonical"
+		// href="http://www.N24.de/news/newsitem_6797232.html"/>
+		result.setUrl(url);
+		result.setOriginalUrl(originalUrl);
+		result.setDate(SHelper.estimateDate(url));
+
+		// Immediately put the url into the cache as extracting content takes
+		// time.
+		if (cache != null) {
+			cache.put(originalUrl, result);
+			cache.put(url, result);
+		}
+
+		String lowerUrl = url.toLowerCase(Locale.getDefault());
+		if (SHelper.isDoc(lowerUrl) || SHelper.isApp(lowerUrl) || SHelper.isPackage(lowerUrl)) {
+			// skip
+		} else if (SHelper.isVideo(lowerUrl) || SHelper.isAudio(lowerUrl)) {
+			result.setVideoUrl(url);
+		} else if (SHelper.isImage(lowerUrl)) {
+			result.setImageUrl(url);
+		} else {
+			extractor.extractContent(result, fetchAsString(url, timeout));
+			if (result.getFaviconUrl().isEmpty())
+				result.setFaviconUrl(SHelper.getDefaultFavicon(url));
+
+			// some links are relative to root and do not include the domain of
+			// the url :(
+			result.setFaviconUrl(fixUrl(url, result.getFaviconUrl()));
+			result.setImageUrl(fixUrl(url, result.getImageUrl()));
+			result.setVideoUrl(fixUrl(url, result.getVideoUrl()));
+			result.setRssUrl(fixUrl(url, result.getRssUrl()));
+		}
+		result.setText(lessText(result.getText()));
+		synchronized (result) {
+			result.notifyAll();
+		}
+		return result;
+	}
+
+	public String lessText(String text) {
+		if (text == null)
+			return "";
+
+		if (maxTextLength >= 0 && text.length() > maxTextLength)
+			return text.substring(0, maxTextLength);
+
+		return text;
+	}
+
+	private static String fixUrl(String url, String urlOrPath) {
+		return SHelper.useDomainOfFirstArg4Second(url, urlOrPath);
+	}
+
+	public String fetchAsString(String urlAsString, int timeout) throws MalformedURLException,
+			IOException {
+		return fetchAsString(urlAsString, timeout, true);
+	}
+
+	public String fetchAsString(String urlAsString, int timeout, boolean includeSomeGooseOptions)
+			throws MalformedURLException, IOException {
+		HttpURLConnection hConn = createUrlConnection(urlAsString, timeout, includeSomeGooseOptions);
+		hConn.setInstanceFollowRedirects(true);
+		String encoding = hConn.getContentEncoding();
+		InputStream is;
+		if (encoding != null && encoding.equalsIgnoreCase("gzip")) {
+			is = new GZIPInputStream(hConn.getInputStream());
+		} else if (encoding != null && encoding.equalsIgnoreCase("deflate")) {
+			is = new InflaterInputStream(hConn.getInputStream(), new Inflater(true));
+		} else {
+			is = hConn.getInputStream();
+		}
+
+		String enc = Converter.extractEncoding(hConn.getContentType());
+		String res = createConverter(urlAsString).streamToString(is, enc);
+		Log.d(Constants.TAG, res.length() + " FetchAsString:" + urlAsString);
+		return res;
+	}
+
+	public Converter createConverter(String url) {
+		return new Converter(url);
+	}
+
+	/**
+	 * On some devices we have to hack:
+	 * http://developers.sun.com/mobility/reference
+	 * /techart/design_guidelines/http_redirection.html
+	 * 
+	 * @param timeout
+	 *            Sets a specified timeout value, in milliseconds
+	 * @return the resolved url if any. Or null if it couldn't resolve the url
+	 *         (within the specified time) or the same url if response code is
+	 *         OK
+	 */
+	public String getResolvedUrl(String urlAsString, int timeout) {
+		String newUrl = null;
+		int responseCode = -1;
+		try {
+			HttpURLConnection hConn = createUrlConnection(urlAsString, timeout, true);
+			// force no follow
+			hConn.setInstanceFollowRedirects(false);
+			// the program doesn't care what the content actually is !!
+			// http://java.sun.com/developer/JDCTechTips/2003/tt0422.html
+			hConn.setRequestMethod("HEAD");
+			hConn.connect();
+			responseCode = hConn.getResponseCode();
+			hConn.getInputStream().close();
+			if (responseCode == HttpURLConnection.HTTP_OK)
+				return urlAsString;
+
+			newUrl = hConn.getHeaderField("Location");
+			if (responseCode / 100 == 3 && newUrl != null) {
+				newUrl = newUrl.replaceAll(" ", "+");
+				// some services use (none-standard) utf8 in their location
+				// header
+				if (urlAsString.startsWith("http://bit.ly")
+						|| urlAsString.startsWith("http://is.gd"))
+					newUrl = encodeUriFromHeader(newUrl);
+
+				// fix problems if shortened twice. as it is often the case
+				// after twitters' t.co bullshit
+				if (furtherResolveNecessary.contains(SHelper.extractDomain(newUrl, true)))
+					newUrl = getResolvedUrl(newUrl, timeout);
+
+				return newUrl;
+			} else
+				return urlAsString;
+
+		} catch (Exception ex) {
+			Log.e(Constants.TAG, "getResolvedUrl:" + urlAsString + " Error:" + ex.getMessage());
+			return "";
+		} finally {
+			Log.e(Constants.TAG, responseCode + " url:" + urlAsString + " resolved:" + newUrl);
+		}
+	}
+
+	/**
+	 * Takes a URI that was decoded as ISO-8859-1 and applies percent-encoding
+	 * to non-ASCII characters. Workaround for broken origin servers that send
+	 * UTF-8 in the Location: header.
+	 */
+	static String encodeUriFromHeader(String badLocation) {
+		StringBuilder sb = new StringBuilder();
+
+		for (char ch : badLocation.toCharArray()) {
+			if (ch < (char) 128) {
+				sb.append(ch);
+			} else {
+				// this is ONLY valid if the uri was decoded using ISO-8859-1
+				sb.append(String.format("%%%02X", (int) ch));
+			}
+		}
+
+		return sb.toString();
+	}
+
+	protected HttpURLConnection createUrlConnection(String urlAsStr, int timeout,
+			boolean includeSomeGooseOptions) throws MalformedURLException, IOException {
+		URL url = new URL(urlAsStr);
+		// using proxy may increase latency
+		HttpURLConnection hConn = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
+		hConn.setRequestProperty("User-Agent", userAgent);
+		hConn.setRequestProperty("Accept", accept);
+
+		if (includeSomeGooseOptions) {
+			hConn.setRequestProperty("Accept-Language", language);
+			hConn.setRequestProperty("content-charset", charset);
+			hConn.addRequestProperty("Referer", referrer);
+			// avoid the cache for testing purposes only?
+			hConn.setRequestProperty("Cache-Control", cacheControl);
+		}
+
+		// suggest respond to be gzipped or deflated (which is just another
+		// compression)
+		// http://stackoverflow.com/q/3932117
+		hConn.setRequestProperty("Accept-Encoding", "gzip, deflate");
+		hConn.setConnectTimeout(timeout);
+		hConn.setReadTimeout(timeout);
+		return hConn;
+	}
+
+	private JResult getFromCache(String url, String originalUrl) throws Exception {
+		if (cache != null) {
+			JResult res = cache.get(url);
+			if (res != null) {
+				// e.g. the cache returned a shortened url as original url now
+				// we want to store the
+				// current original url! Also it can be that the cache response
+				// to url but the JResult
+				// does not contain it so overwrite it:
+				res.setUrl(url);
+				res.setOriginalUrl(originalUrl);
+				cacheCounter.addAndGet(1);
+				return res;
+			}
+		}
+		return null;
+	}
+}
--- a/src/acr/browser/lightning/Reading/ImageResult.java
+++ b/src/acr/browser/lightning/Reading/ImageResult.java
@ -0,0 +1,31 @@
+package acr.browser.lightning.Reading;
+
+import org.jsoup.nodes.Element;
+
+/**
+ * Class which encapsulates the data from an image found under an element
+ * 
+ * @author Chris Alexander, chris@chris-alexander.co.uk
+ */
+public class ImageResult {
+
+	public String src;
+	public Integer weight;
+	public String title;
+	public int height;
+	public int width;
+	public String alt;
+	public boolean noFollow;
+	public Element element;
+
+	public ImageResult(String src, Integer weight, String title, int height, int width, String alt,
+			boolean noFollow) {
+		this.src = src;
+		this.weight = weight;
+		this.title = title;
+		this.height = height;
+		this.width = width;
+		this.alt = alt;
+		this.noFollow = noFollow;
+	}
+}
--- a/src/acr/browser/lightning/Reading/JResult.java
+++ b/src/acr/browser/lightning/Reading/JResult.java
@ -0,0 +1,216 @@
+/*
+ *  Copyright 2011 Peter Karich 
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package acr.browser.lightning.Reading;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Parsed result from web page containing important title, text and image.
+ * 
+ * @author Peter Karich
+ */
+public class JResult implements Serializable {
+
+	private String title;
+	private String url;
+	private String originalUrl;
+	private String canonicalUrl;
+	private String imageUrl;
+	private String videoUrl;
+	private String rssUrl;
+	private String text;
+	private String faviconUrl;
+	private String description;
+	private String dateString;
+	private List<String> textList;
+	private Collection<String> keywords;
+	private List<ImageResult> images = null;
+
+	public JResult() {
+	}
+
+	public String getUrl() {
+		if (url == null)
+			return "";
+		return url;
+	}
+
+	public JResult setUrl(String url) {
+		this.url = url;
+		return this;
+	}
+
+	public JResult setOriginalUrl(String originalUrl) {
+		this.originalUrl = originalUrl;
+		return this;
+	}
+
+	public String getOriginalUrl() {
+		return originalUrl;
+	}
+
+	public JResult setCanonicalUrl(String canonicalUrl) {
+		this.canonicalUrl = canonicalUrl;
+		return this;
+	}
+
+	public String getCanonicalUrl() {
+		return canonicalUrl;
+	}
+
+	public String getFaviconUrl() {
+		if (faviconUrl == null)
+			return "";
+		return faviconUrl;
+	}
+
+	public JResult setFaviconUrl(String faviconUrl) {
+		this.faviconUrl = faviconUrl;
+		return this;
+	}
+
+	public JResult setRssUrl(String rssUrl) {
+		this.rssUrl = rssUrl;
+		return this;
+	}
+
+	public String getRssUrl() {
+		if (rssUrl == null)
+			return "";
+		return rssUrl;
+	}
+
+	public String getDescription() {
+		if (description == null)
+			return "";
+		return description;
+	}
+
+	public JResult setDescription(String description) {
+		this.description = description;
+		return this;
+	}
+
+	public String getImageUrl() {
+		if (imageUrl == null)
+			return "";
+		return imageUrl;
+	}
+
+	public JResult setImageUrl(String imageUrl) {
+		this.imageUrl = imageUrl;
+		return this;
+	}
+
+	public String getText() {
+		if (text == null)
+			return "";
+
+		return text;
+	}
+
+	public JResult setText(String text) {
+		this.text = text;
+		return this;
+	}
+
+	public List<String> getTextList() {
+		if (this.textList == null)
+			return new ArrayList<String>();
+		return this.textList;
+	}
+
+	public JResult setTextList(List<String> textList) {
+		this.textList = textList;
+		return this;
+	}
+
+	public String getTitle() {
+		if (title == null)
+			return "";
+		return title;
+	}
+
+	public JResult setTitle(String title) {
+		this.title = title;
+		return this;
+	}
+
+	public String getVideoUrl() {
+		if (videoUrl == null)
+			return "";
+		return videoUrl;
+	}
+
+	public JResult setVideoUrl(String videoUrl) {
+		this.videoUrl = videoUrl;
+		return this;
+	}
+
+	public JResult setDate(String date) {
+		this.dateString = date;
+		return this;
+	}
+
+	public Collection<String> getKeywords() {
+		return keywords;
+	}
+
+	public void setKeywords(Collection<String> keywords) {
+		this.keywords = keywords;
+	}
+
+	/**
+	 * @return get date from url or guessed from text
+	 */
+	public String getDate() {
+		return dateString;
+	}
+
+	/**
+	 * @return images list
+	 */
+	public List<ImageResult> getImages() {
+		if (images == null)
+			return Collections.emptyList();
+		return images;
+	}
+
+	/**
+	 * @return images count
+	 */
+	public int getImagesCount() {
+		if (images == null)
+			return 0;
+		return images.size();
+	}
+
+	/**
+	 * set images list
+	 */
+	public void setImages(List<ImageResult> images) {
+		this.images = images;
+	}
+
+	@Override
+	public String toString() {
+		return "title:" + getTitle() + " imageUrl:" + getImageUrl() + " text:" + text;
+	}
+}
--- a/src/acr/browser/lightning/Reading/MapEntry.java
+++ b/src/acr/browser/lightning/Reading/MapEntry.java
@ -0,0 +1,80 @@
+/**
+ * Copyright (C) 2010 Peter Karich <>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package acr.browser.lightning.Reading;
+
+import java.io.Serializable;
+import java.util.Map;
+
+/**
+ * Simple impl of Map.Entry. So that we can have ordered maps.
+ * 
+ * @author Peter Karich, peat_hal ‘at’ users ‘dot’ sourceforge ‘dot’
+ *         net
+ */
+public class MapEntry<K, V> implements Map.Entry<K, V>, Serializable {
+
+	private static final long serialVersionUID = 1L;
+	private K key;
+	private V value;
+
+	public MapEntry(K key, V value) {
+		this.key = key;
+		this.value = value;
+	}
+
+	@Override
+	public K getKey() {
+		return key;
+	}
+
+	@Override
+	public V getValue() {
+		return value;
+	}
+
+	@Override
+	public V setValue(V value) {
+		this.value = value;
+		return value;
+	}
+
+	@Override
+	public String toString() {
+		return getKey() + ", " + getValue();
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		final MapEntry<K, V> other = (MapEntry<K, V>) obj;
+		if (this.key != other.key && (this.key == null || !this.key.equals(other.key)))
+			return false;
+		if (this.value != other.value && (this.value == null || !this.value.equals(other.value)))
+			return false;
+		return true;
+	}
+
+	@Override
+	public int hashCode() {
+		int hash = 7;
+		hash = 19 * hash + (this.key != null ? this.key.hashCode() : 0);
+		hash = 19 * hash + (this.value != null ? this.value.hashCode() : 0);
+		return hash;
+	}
+}
--- a/src/acr/browser/lightning/Reading/OutputFormatter.java
+++ b/src/acr/browser/lightning/Reading/OutputFormatter.java
@ -0,0 +1,174 @@
+package acr.browser.lightning.Reading;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.regex.Pattern;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+
+/**
+ * @author goose | jim
+ * @author karussell
+ * 
+ *         this class will be responsible for taking our top node and stripping
+ *         out junk we don't want and getting it ready for how we want it
+ *         presented to the user
+ */
+public class OutputFormatter {
+
+	public static final int MIN_PARAGRAPH_TEXT = 50;
+	private static final List<String> NODES_TO_REPLACE = Arrays.asList("strong", "b", "i");
+	private Pattern unlikelyPattern = Pattern.compile("display\\:none|visibility\\:hidden");
+	protected final int minParagraphText;
+	protected final List<String> nodesToReplace;
+	protected String nodesToKeepCssSelector = "p";
+
+	public OutputFormatter() {
+		this(MIN_PARAGRAPH_TEXT, NODES_TO_REPLACE);
+	}
+
+	public OutputFormatter(int minParagraphText) {
+		this(minParagraphText, NODES_TO_REPLACE);
+	}
+
+	public OutputFormatter(int minParagraphText, List<String> nodesToReplace) {
+		this.minParagraphText = minParagraphText;
+		this.nodesToReplace = nodesToReplace;
+	}
+
+	/**
+	 * set elements to keep in output text
+	 */
+	public void setNodesToKeepCssSelector(String nodesToKeepCssSelector) {
+		this.nodesToKeepCssSelector = nodesToKeepCssSelector;
+	}
+
+	/**
+	 * takes an element and turns the P tags into \n\n
+	 */
+	public String getFormattedText(Element topNode) {
+		removeNodesWithNegativeScores(topNode);
+		StringBuilder sb = new StringBuilder();
+		append(topNode, sb, nodesToKeepCssSelector);
+		String str = SHelper.innerTrim(sb.toString());
+		if (str.length() > 100)
+			return str;
+
+		// no subelements
+		if (str.isEmpty() || !topNode.text().isEmpty()
+				&& str.length() <= topNode.ownText().length())
+			str = topNode.text();
+
+		// if jsoup failed to parse the whole html now parse this smaller
+		// snippet again to avoid html tags disturbing our text:
+		return Jsoup.parse(str).text();
+	}
+
+	/**
+	 * Takes an element and returns a list of texts extracted from the P tags
+	 */
+	public List<String> getTextList(Element topNode) {
+		List<String> texts = new ArrayList<String>();
+		for (Element element : topNode.select(this.nodesToKeepCssSelector)) {
+			if (element.hasText()) {
+				texts.add(element.text());
+			}
+		}
+		return texts;
+	}
+
+	/**
+	 * If there are elements inside our top node that have a negative gravity
+	 * score remove them
+	 */
+	protected void removeNodesWithNegativeScores(Element topNode) {
+		Elements gravityItems = topNode.select("*[gravityScore]");
+		for (Element item : gravityItems) {
+			int score = Integer.parseInt(item.attr("gravityScore"));
+			if (score < 0 || item.text().length() < minParagraphText)
+				item.remove();
+		}
+	}
+
+	protected void append(Element node, StringBuilder sb, String tagName) {
+		// is select more costly then getElementsByTag?
+		MAIN: for (Element e : node.select(tagName)) {
+			Element tmpEl = e;
+			// check all elements until 'node'
+			while (tmpEl != null && !tmpEl.equals(node)) {
+				if (unlikely(tmpEl))
+					continue MAIN;
+				tmpEl = tmpEl.parent();
+			}
+
+			String text = node2Text(e);
+			if (text.isEmpty() || text.length() < minParagraphText
+					|| text.length() > SHelper.countLetters(text) * 2)
+				continue;
+
+			sb.append(text);
+			sb.append("\n\n");
+		}
+	}
+
+	boolean unlikely(Node e) {
+		if (e.attr("class") != null && e.attr("class").toLowerCase(Locale.getDefault()).contains("caption"))
+			return true;
+
+		String style = e.attr("style");
+		String clazz = e.attr("class");
+		if (unlikelyPattern.matcher(style).find() || unlikelyPattern.matcher(clazz).find())
+			return true;
+		return false;
+	}
+
+	void appendTextSkipHidden(Element e, StringBuilder accum) {
+		for (Node child : e.childNodes()) {
+			if (unlikely(child))
+				continue;
+			if (child instanceof TextNode) {
+				TextNode textNode = (TextNode) child;
+				String txt = textNode.text();
+				accum.append(txt);
+			} else if (child instanceof Element) {
+				Element element = (Element) child;
+				if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
+					accum.append(" ");
+				else if (element.tagName().equals("br"))
+					accum.append(" ");
+				appendTextSkipHidden(element, accum);
+			}
+		}
+	}
+
+	boolean lastCharIsWhitespace(StringBuilder accum) {
+		if (accum.length() == 0)
+			return false;
+		return Character.isWhitespace(accum.charAt(accum.length() - 1));
+	}
+
+	protected String node2TextOld(Element el) {
+		return el.text();
+	}
+
+	protected String node2Text(Element el) {
+		StringBuilder sb = new StringBuilder(200);
+		appendTextSkipHidden(el, sb);
+		return sb.toString();
+	}
+
+	public OutputFormatter setUnlikelyPattern(String unlikelyPattern) {
+		this.unlikelyPattern = Pattern.compile(unlikelyPattern);
+		return this;
+	}
+
+	public OutputFormatter appendUnlikelyPattern(String str) {
+		return setUnlikelyPattern(unlikelyPattern.toString() + "|" + str);
+	}
+}
--- a/src/acr/browser/lightning/Reading/SCache.java
+++ b/src/acr/browser/lightning/Reading/SCache.java
@ -0,0 +1,29 @@
+/*
+ *  Copyright 2011 Peter Karich 
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package acr.browser.lightning.Reading;
+
+/**
+ * 
+ * @author Peter Karich
+ */
+public interface SCache {
+
+	JResult get(String url);
+
+	void put(String url, JResult res);
+
+	int getSize();
+}
--- a/src/acr/browser/lightning/Reading/SHelper.java
+++ b/src/acr/browser/lightning/Reading/SHelper.java
@ -0,0 +1,480 @@
+/*
+ *  Copyright 2011 Peter Karich 
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package acr.browser.lightning.Reading;
+
+import java.io.UnsupportedEncodingException;
+import java.net.CookieHandler;
+import java.net.CookieManager;
+import java.net.CookiePolicy;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
+import java.security.SecureRandom;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.net.ssl.KeyManager;
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+import org.jsoup.nodes.Element;
+
+/**
+ * 
+ * @author Peter Karich
+ */
+public class SHelper {
+
+	public static final String UTF8 = "UTF-8";
+	private static final Pattern SPACE = Pattern.compile(" ");
+
+	public static String replaceSpaces(String url) {
+		if (!url.isEmpty()) {
+			url = url.trim();
+			if (url.contains(" ")) {
+				Matcher spaces = SPACE.matcher(url);
+				url = spaces.replaceAll("%20");
+			}
+		}
+		return url;
+	}
+
+	public static int count(String str, String substring) {
+		int c = 0;
+		int index1 = str.indexOf(substring);
+		if (index1 >= 0) {
+			c++;
+			c += count(str.substring(index1 + substring.length()), substring);
+		}
+		return c;
+	}
+
+	/**
+	 * remove more than two spaces or newlines
+	 */
+	public static String innerTrim(String str) {
+		if (str.isEmpty())
+			return "";
+
+		StringBuilder sb = new StringBuilder();
+		boolean previousSpace = false;
+		for (int i = 0; i < str.length(); i++) {
+			char c = str.charAt(i);
+			if (c == ' ' || (int) c == 9 || c == '\n') {
+				previousSpace = true;
+				continue;
+			}
+
+			if (previousSpace)
+				sb.append(' ');
+
+			previousSpace = false;
+			sb.append(c);
+		}
+		return sb.toString().trim();
+	}
+
+	/**
+	 * Starts reading the encoding from the first valid character until an
+	 * invalid encoding character occurs.
+	 */
+	public static String encodingCleanup(String str) {
+		StringBuilder sb = new StringBuilder();
+		boolean startedWithCorrectString = false;
+		for (int i = 0; i < str.length(); i++) {
+			char c = str.charAt(i);
+			if (Character.isDigit(c) || Character.isLetter(c) || c == '-' || c == '_') {
+				startedWithCorrectString = true;
+				sb.append(c);
+				continue;
+			}
+
+			if (startedWithCorrectString)
+				break;
+		}
+		return sb.toString().trim();
+	}
+
+	/**
+	 * @return the longest substring as str1.substring(result[0], result[1]);
+	 */
+	public static String getLongestSubstring(String str1, String str2) {
+		int res[] = longestSubstring(str1, str2);
+		if (res == null || res[0] >= res[1])
+			return "";
+
+		return str1.substring(res[0], res[1]);
+	}
+
+	public static int[] longestSubstring(String str1, String str2) {
+		if (str1 == null || str1.isEmpty() || str2 == null || str2.isEmpty())
+			return null;
+
+		// dynamic programming => save already identical length into array
+		// to understand this algo simply print identical length in every entry
+		// of the array
+		// i+1, j+1 then reuses information from i,j
+		// java initializes them already with 0
+		int[][] num = new int[str1.length()][str2.length()];
+		int maxlen = 0;
+		int lastSubstrBegin = 0;
+		int endIndex = 0;
+		for (int i = 0; i < str1.length(); i++) {
+			for (int j = 0; j < str2.length(); j++) {
+				if (str1.charAt(i) == str2.charAt(j)) {
+					if ((i == 0) || (j == 0))
+						num[i][j] = 1;
+					else
+						num[i][j] = 1 + num[i - 1][j - 1];
+
+					if (num[i][j] > maxlen) {
+						maxlen = num[i][j];
+						// generate substring from str1 => i
+						lastSubstrBegin = i - num[i][j] + 1;
+						endIndex = i + 1;
+					}
+				}
+			}
+		}
+		return new int[] { lastSubstrBegin, endIndex };
+	}
+
+	public static String getDefaultFavicon(String url) {
+		return useDomainOfFirstArg4Second(url, "/favicon.ico");
+	}
+
+	/**
+	 * @param urlForDomain
+	 *            extract the domain from this url
+	 * @param path
+	 *            this url does not have a domain
+	 * @return
+	 */
+	public static String useDomainOfFirstArg4Second(String urlForDomain, String path) {
+		if (path.startsWith("http"))
+			return path;
+
+		if ("favicon.ico".equals(path))
+			path = "/favicon.ico";
+
+		if (path.startsWith("//")) {
+			// wikipedia special case, see tests
+			if (urlForDomain.startsWith("https:"))
+				return "https:" + path;
+
+			return "http:" + path;
+		} else if (path.startsWith("/"))
+			return "http://" + extractHost(urlForDomain) + path;
+		else if (path.startsWith("../")) {
+			int slashIndex = urlForDomain.lastIndexOf("/");
+			if (slashIndex > 0 && slashIndex + 1 < urlForDomain.length())
+				urlForDomain = urlForDomain.substring(0, slashIndex + 1);
+
+			return urlForDomain + path;
+		}
+		return path;
+	}
+
+	public static String extractHost(String url) {
+		return extractDomain(url, false);
+	}
+
+	public static String extractDomain(String url, boolean aggressive) {
+		if (url.startsWith("http://"))
+			url = url.substring("http://".length());
+		else if (url.startsWith("https://"))
+			url = url.substring("https://".length());
+
+		if (aggressive) {
+			if (url.startsWith("www."))
+				url = url.substring("www.".length());
+
+			// strip mobile from start
+			if (url.startsWith("m."))
+				url = url.substring("m.".length());
+		}
+
+		int slashIndex = url.indexOf("/");
+		if (slashIndex > 0)
+			url = url.substring(0, slashIndex);
+
+		return url;
+	}
+
+	public static boolean isVideoLink(String url) {
+		url = extractDomain(url, true);
+		return url.startsWith("youtube.com") || url.startsWith("video.yahoo.com")
+				|| url.startsWith("vimeo.com") || url.startsWith("blip.tv");
+	}
+
+	public static boolean isVideo(String url) {
+		return url.endsWith(".mpeg") || url.endsWith(".mpg") || url.endsWith(".avi")
+				|| url.endsWith(".mov") || url.endsWith(".mpg4") || url.endsWith(".mp4")
+				|| url.endsWith(".flv") || url.endsWith(".wmv");
+	}
+
+	public static boolean isAudio(String url) {
+		return url.endsWith(".mp3") || url.endsWith(".ogg") || url.endsWith(".m3u")
+				|| url.endsWith(".wav");
+	}
+
+	public static boolean isDoc(String url) {
+		return url.endsWith(".pdf") || url.endsWith(".ppt") || url.endsWith(".doc")
+				|| url.endsWith(".swf") || url.endsWith(".rtf") || url.endsWith(".xls");
+	}
+
+	public static boolean isPackage(String url) {
+		return url.endsWith(".gz") || url.endsWith(".tgz") || url.endsWith(".zip")
+				|| url.endsWith(".rar") || url.endsWith(".deb") || url.endsWith(".rpm")
+				|| url.endsWith(".7z");
+	}
+
+	public static boolean isApp(String url) {
+		return url.endsWith(".exe") || url.endsWith(".bin") || url.endsWith(".bat")
+				|| url.endsWith(".dmg");
+	}
+
+	public static boolean isImage(String url) {
+		return url.endsWith(".png") || url.endsWith(".jpeg") || url.endsWith(".gif")
+				|| url.endsWith(".jpg") || url.endsWith(".bmp") || url.endsWith(".ico")
+				|| url.endsWith(".eps");
+	}
+
+	/**
+	 * @see http
+	 *      ://blogs.sun.com/CoreJavaTechTips/entry/cookie_handling_in_java_se
+	 */
+	public static void enableCookieMgmt() {
+		CookieManager manager = new CookieManager();
+		manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
+		CookieHandler.setDefault(manager);
+	}
+
+	/**
+	 * @see http
+	 *      ://stackoverflow.com/questions/2529682/setting-user-agent-of-a-java
+	 *      -urlconnection
+	 */
+	public static void enableUserAgentOverwrite() {
+		System.setProperty("http.agent", "");
+	}
+
+	public static String getUrlFromUglyGoogleRedirect(String url) {
+		if (url.startsWith("http://www.google.com/url?")) {
+			url = url.substring("http://www.google.com/url?".length());
+			String arr[] = urlDecode(url).split("\\&");
+			if (arr != null)
+				for (String str : arr) {
+					if (str.startsWith("q="))
+						return str.substring("q=".length());
+				}
+		}
+
+		return null;
+	}
+
+	public static String getUrlFromUglyFacebookRedirect(String url) {
+		if (url.startsWith("http://www.facebook.com/l.php?u=")) {
+			url = url.substring("http://www.facebook.com/l.php?u=".length());
+			return urlDecode(url);
+		}
+
+		return null;
+	}
+
+	public static String urlEncode(String str) {
+		try {
+			return URLEncoder.encode(str, UTF8);
+		} catch (UnsupportedEncodingException ex) {
+			return str;
+		}
+	}
+
+	public static String urlDecode(String str) {
+		try {
+			return URLDecoder.decode(str, UTF8);
+		} catch (UnsupportedEncodingException ex) {
+			return str;
+		}
+	}
+
+	/**
+	 * Popular sites uses the #! to indicate the importance of the following
+	 * chars. Ugly but true. Such as: facebook, twitter, gizmodo, ...
+	 */
+	public static String removeHashbang(String url) {
+		return url.replaceFirst("#!", "");
+	}
+
+	public static String printNode(Element root) {
+		return printNode(root, 0);
+	}
+
+	public static String printNode(Element root, int indentation) {
+		StringBuilder sb = new StringBuilder();
+		for (int i = 0; i < indentation; i++) {
+			sb.append(' ');
+		}
+		sb.append(root.tagName());
+		sb.append(":");
+		sb.append(root.ownText());
+		sb.append("\n");
+		for (Element el : root.children()) {
+			sb.append(printNode(el, indentation + 1));
+			sb.append("\n");
+		}
+		return sb.toString();
+	}
+
+	public static String estimateDate(String url) {
+		int index = url.indexOf("://");
+		if (index > 0)
+			url = url.substring(index + 3);
+
+		int year = -1;
+		int yearCounter = -1;
+		int month = -1;
+		int monthCounter = -1;
+		int day = -1;
+		String strs[] = url.split("/");
+		for (int counter = 0; counter < strs.length; counter++) {
+			String str = strs[counter];
+			if (str.length() == 4) {
+				try {
+					year = Integer.parseInt(str);
+				} catch (Exception ex) {
+					continue;
+				}
+				if (year < 1970 || year > 3000) {
+					year = -1;
+					continue;
+				}
+				yearCounter = counter;
+			} else if (str.length() == 2) {
+				if (monthCounter < 0 && counter == yearCounter + 1) {
+					try {
+						month = Integer.parseInt(str);
+					} catch (Exception ex) {
+						continue;
+					}
+					if (month < 1 || month > 12) {
+						month = -1;
+						continue;
+					}
+					monthCounter = counter;
+				} else if (counter == monthCounter + 1) {
+					try {
+						day = Integer.parseInt(str);
+					} catch (Exception ex) {
+					}
+					if (day < 1 || day > 31) {
+						day = -1;
+						continue;
+					}
+					break;
+				}
+			}
+		}
+
+		if (year < 0)
+			return null;
+
+		StringBuilder str = new StringBuilder();
+		str.append(year);
+		if (month < 1)
+			return str.toString();
+
+		str.append('/');
+		if (month < 10)
+			str.append('0');
+		str.append(month);
+		if (day < 1)
+			return str.toString();
+
+		str.append('/');
+		if (day < 10)
+			str.append('0');
+		str.append(day);
+		return str.toString();
+	}
+
+	public static String completeDate(String dateStr) {
+		if (dateStr == null)
+			return null;
+
+		int index = dateStr.indexOf('/');
+		if (index > 0) {
+			index = dateStr.indexOf('/', index + 1);
+			if (index > 0)
+				return dateStr;
+			else
+				return dateStr + "/01";
+		}
+		return dateStr + "/01/01";
+	}
+
+	/**
+	 * keep in mind: simpleDateFormatter is not thread safe! call completeDate
+	 * before applying this formatter.
+	 */
+	public static SimpleDateFormat createDateFormatter() {
+		return new SimpleDateFormat("yyyy/MM/dd", Locale.getDefault());
+	}
+
+	// with the help of
+	// http://stackoverflow.com/questions/1828775/httpclient-and-ssl
+	public static void enableAnySSL() {
+		try {
+			SSLContext ctx = SSLContext.getInstance("TLS");
+			ctx.init(new KeyManager[0], new TrustManager[] { new DefaultTrustManager() },
+					new SecureRandom());
+			SSLContext.setDefault(ctx);
+		} catch (Exception ex) {
+			ex.printStackTrace();
+		}
+	}
+
+	private static class DefaultTrustManager implements X509TrustManager {
+
+		@Override
+		public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+				throws CertificateException {
+		}
+
+		@Override
+		public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+				throws CertificateException {
+		}
+
+		@Override
+		public X509Certificate[] getAcceptedIssuers() {
+			return null;
+		}
+	}
+
+	public static int countLetters(String str) {
+		int len = str.length();
+		int chars = 0;
+		for (int i = 0; i < len; i++) {
+			if (Character.isLetter(str.charAt(i)))
+				chars++;
+		}
+		return chars;
+	}
+}
--- a/src/acr/browser/lightning/ReadingActivity.java
+++ b/src/acr/browser/lightning/ReadingActivity.java
@ -0,0 +1,153 @@
+package acr.browser.lightning;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import acr.browser.lightning.Reading.HtmlFetcher;
+import acr.browser.lightning.Reading.JResult;
+import android.animation.ObjectAnimator;
+import android.app.ProgressDialog;
+import android.content.Context;
+import android.content.Intent;
+import android.os.AsyncTask;
+import android.os.Bundle;
+import android.support.v7.app.ActionBarActivity;
+import android.support.v7.widget.Toolbar;
+import android.view.MenuItem;
+import android.view.View;
+import android.widget.TextView;
+
+public class ReadingActivity extends ActionBarActivity {
+
+	private TextView mTitle;
+	private TextView mBody;
+
+	@Override
+	protected void onCreate(Bundle savedInstanceState) {
+		super.onCreate(savedInstanceState);
+		setContentView(R.layout.reading_view);
+
+		Toolbar toolbar = (Toolbar) findViewById(R.id.toolbar);
+		setSupportActionBar(toolbar);
+
+		getSupportActionBar().setDisplayHomeAsUpEnabled(true);
+
+		mTitle = (TextView) findViewById(R.id.textViewTitle);
+		mBody = (TextView) findViewById(R.id.textViewBody);
+
+		mTitle.setText(getString(R.string.untitled));
+		mBody.setText(getString(R.string.loading));
+
+		mTitle.setVisibility(View.INVISIBLE);
+		mBody.setVisibility(View.INVISIBLE);
+
+		Intent intent = getIntent();
+		if (!loadPage(intent)) {
+			setText(getString(R.string.untitled), getString(R.string.loading_failed));
+		}
+	}
+
+	protected boolean loadPage(Intent intent) {
+		if (intent == null) {
+			return false;
+		}
+		String url = intent.getStringExtra(Constants.LOAD_READING_URL);
+		if (url == null) {
+			return false;
+		}
+		getSupportActionBar().setTitle(Utils.getDomainName(url));
+		new PageLoader(this).execute(url);
+		return true;
+	}
+
+	private class PageLoader extends AsyncTask<String, Void, Void> {
+
+		private Context mContext;
+		private ProgressDialog mProgressDialog;
+		private String mTitleText;
+		private List<String> mBodyText;
+
+		public PageLoader(Context context) {
+			mContext = context;
+		}
+
+		@Override
+		protected void onPreExecute() {
+			super.onPreExecute();
+			mProgressDialog = new ProgressDialog(mContext);
+			mProgressDialog.setProgressStyle(ProgressDialog.STYLE_SPINNER);
+			mProgressDialog.setCancelable(false);
+			mProgressDialog.setIndeterminate(true);
+			mProgressDialog.setMessage(mContext.getString(R.string.loading));
+			mProgressDialog.show();
+		}
+
+		@Override
+		protected Void doInBackground(String... params) {
+
+			HtmlFetcher fetcher = new HtmlFetcher();
+			try {
+				JResult result = fetcher.fetchAndExtract(params[0], 5000, true);
+				mTitleText = result.getTitle();
+				mBodyText = result.getTextList();
+			} catch (Exception e) {
+				mTitleText = "";
+				mBodyText = new ArrayList<>();
+				e.printStackTrace();
+			} catch (OutOfMemoryError e) {
+				System.gc();
+				mTitleText = "";
+				mBodyText = new ArrayList<>();
+				e.printStackTrace();
+			}
+			return null;
+		}
+
+		@Override
+		protected void onPostExecute(Void result) {
+			mProgressDialog.dismiss();
+			if (mTitleText.isEmpty() || mBodyText.isEmpty()) {
+				setText(getString(R.string.untitled), getString(R.string.loading_failed));
+			} else {
+				StringBuilder builder = new StringBuilder();
+				for (String text : mBodyText) {
+					builder.append(text + "\n\n");
+				}
+				setText(mTitleText, builder.toString());
+			}
+			super.onPostExecute(result);
+		}
+
+	}
+
+	private void setText(String title, String body) {
+		if (mTitle.getVisibility() == View.INVISIBLE) {
+			mTitle.setAlpha(0.0f);
+			mTitle.setVisibility(View.VISIBLE);
+			mTitle.setText(title);
+			ObjectAnimator animator = ObjectAnimator.ofFloat(mTitle, "alpha", 1.0f);
+			animator.setDuration(300);
+			animator.start();
+		} else {
+			mTitle.setText(title);
+		}
+
+		if (mBody.getVisibility() == View.INVISIBLE) {
+			mBody.setAlpha(0.0f);
+			mBody.setVisibility(View.VISIBLE);
+			mBody.setText(body);
+			ObjectAnimator animator = ObjectAnimator.ofFloat(mBody, "alpha", 1.0f);
+			animator.setDuration(300);
+			animator.start();
+		} else {
+			mBody.setText(body);
+		}
+	}
+
+	@Override
+	public boolean onOptionsItemSelected(MenuItem item) {
+		finish();
+		return super.onOptionsItemSelected(item);
+	}
+
+}