android – get Text out of webview

Getting the plain text content from a webview is rather hard. Basically, the android classes don’t offer it, but javascript does, and Android offers a way for javascript to pass the information back to your code.

Before I go into the details, do note that if your html structure is simple, you might be better off just parsing the data manually.

That said, here is what you do:

  1. Enable javascript
  2. Add your own javascript interface class, to allow the javascript to communicate with your Android code
  3. Register your own webviewClient, overriding the onPageFinished to insert a bit of javascript
  4. In the javascript, acquire the element.innerText of the tag, and pass it to your javascript interface.

To clarify, I’ll post a working (but very rough) code example below. It displays a webview on the top, and a textview with the text-based contents on the bottom.

package test.android.webview;

import android.app.Activity;
import android.os.Bundle;
import android.webkit.WebView;
import android.webkit.WebViewClient;
import android.widget.TextView;

public class WebviewTest2Activity extends Activity {
    /** Called when the activity is first created. */
    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.main);

        WebView webView = (WebView) findViewById(R.id.webView);
        TextView contentView = (TextView) findViewById(R.id.contentView);

        /* An instance of this class will be registered as a JavaScript interface */ 
        class MyJavaScriptInterface 
        { 
            private TextView contentView;

            public MyJavaScriptInterface(TextView aContentView)
            {
                contentView = aContentView;
            }

            @SuppressWarnings("unused") 

            public void processContent(String aContent) 
            { 
                final String content = aContent;
                contentView.post(new Runnable() 
                {    
                    public void run() 
                    {          
                        contentView.setText(content);        
                    }     
                });
            } 
        } 

        webView.getSettings().setJavaScriptEnabled(true); 
        webView.addJavascriptInterface(new MyJavaScriptInterface(contentView), "INTERFACE"); 
        webView.setWebViewClient(new WebViewClient() { 
            @Override 
            public void onPageFinished(WebView view, String url) 
            { 
                view.loadUrl("javascript:window.INTERFACE.processContent(document.getElementsByTagName('body')[0].innerText);"); 
            } 
        }); 

        webView.loadUrl("http://shinyhammer.blogspot.com");
    }
}

Using the following main.xml:

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:layout_width="fill_parent"
    android:layout_height="fill_parent"
    android:orientation="vertical" >

    <WebView
        android:id="@+id/webView"
        android:layout_width="match_parent"
        android:layout_height="fill_parent"
        android:layout_weight="0.5" />

    <TextView
        android:id="@+id/contentView"
        android:layout_width="match_parent"
        android:layout_height="fill_parent"
        android:layout_weight="0.5" />


</LinearLayout>

Leave a Comment