java 网页页面抓取标题和正文心得
import ;
import ;
import ;
import .MalformedURLException;
import .URL;
import ;
import ;
import ;
import ;
import ;
public class WebContent
{
/**
* 读取一个网页全部内容
*/
public String getOneHtml(final String htmlurl) throws IOException
{
URL url;
String temp;
final StringBuffer sb = new StringBuffer();
try
{
url = new URL(htmlurl);
final BufferedReader in = new BufferedReader(new InputStreamReader((), "utf-8"));// 读取网页全部内容
while ((temp = ()) != null)
{
(temp);
}
();
}
catch (final MalformedURLException me)
{
("你输入的URL格式有问题!请仔细输入");
();
throw me;
}
catch (final IOException e)
{
();
throw e;
}
return ();
}
/**
*
* ***@param s
* ***@return 获得网页标题
*/
public String getTitle(final String s)
{
String regex;
String title = "";
final List<String> list = new ArrayList<String>();
regex = "<title>.*?</title>";
final Pattern pa = pile(regex, );
final Matcher ma = (s);
while (())
{
(());
}
for (int i = 0; i < (); i++)
{
title = title + (i);
}
retu
java 网页页面抓取标题和正文心得 来自淘豆网m.daumloan.com转载请标明出处.