文章程序功能用法:输入网页地址连接(存放在数组中),如 http://news.163.com (支持同时将几个连接输入);
原理、类包:使用开源类包:HttpClient, NekoHTML, xerces2 ,ExecutorService (java5线程池支持)
所需类包(.jar):
下载: http://quanta.11tea.com/share/13956
执行代码:
package leung.utils;
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.concurrent.*;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.methods.GetMethod;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.NodeList;
public class GetPhotoByTags {
private HttpClient client = null;
//线程池
private ExecutorService exe=null;
//线程池的容量
private static final int POOL_SIZE=30;
public void test() throws Exception
{
client = new HttpClient(new MultiThreadedHttpConnectionManager());
exe = Executors.newFixedThreadPool(POOL_SIZE);
client.getHttpConnectionManager().getParams().setConnectionTimeout(30000);
String url;
DOMParser parser = new DOMParser();
// links web pages like "http://news.163.com/"
String[] argv = {
"http://news.163.com/"
};
NodeList nlist = null;
String postfix = null;
File folder = null;
for (int i = 0; i < argv.length; i++) {
ArrayList<String> al = new ArrayList<String>();
parser.parse(argv[i]);
nlist = parser.getDocument().getElementsByTagName("img");
for(int j=0;j<nlist.getLength();j++)
{
al.add(nlist.item(j).getAttributes().getNamedItem("src").getNodeValue().toString());
}
Object[] aURLs =al.toArray();
folder = new File("F:/java_test/photos/"+(new Date()).getTime());
folder.mkdir();
for(int j=0;j<aURLs.length;j++)
{
url = (String)aURLs[j];
postfix = url.substring(url.lastIndexOf(".")+1,url.length()).toLowerCase();
if( postfix.equals("jpg") || postfix.equals("jpeg") )
{
System.out.println(url);
// System.out.println(folder);
loadPage(url,j , folder.getPath()+"/");
}
}
}
exe.shutdown();
System.out.println("done");
openFileSystemDir("f:/java_test/photos");
}
private void loadPage(final String url,final int i,final String folder)
{
// create a new thread to load the URL
exe.execute(
new Thread() {
public void run() {
GetMethod get = new GetMethod(url);
get.setFollowRedirects(true);
try {
client.executeMethod(get);
File storeFile = new File(folder+ "pp"+i+".jpg");
FileOutputStream output = new FileOutputStream(storeFile);
output.write(get.getResponseBody());
output.close();
} catch (Exception ex) {
ex.printStackTrace();
} finally {
get.releaseConnection();
}
}
}
);
}
private void openFileSystemDir(String dir)
{
String openDirCmdString = "cmd.exe /c start ";
String openDirCmd = openDirCmdString + dir;
try{
Runtime r = Runtime.getRuntime();
Process p = r.exec(openDirCmd);
}catch(Exception e){
e.printStackTrace();
}
}
public static void main(String[] args){
try{
new GetPhotoByTags().test();
}catch(Exception e){
e.printStackTrace();
}
}
}
注意:程序对网页中的图片进行了筛选,只保存jpg。程序运行完毕会生成文件夹保存图片。文件夹以时间命名。