jsoup抓取豆瓣美女

2018-07-20    来源:open-open

容器云强势上线!快速搭建集群,上万Linux镜像随意使用
import java.io.BufferedOutputStream;  
import java.io.File;  
import java.io.FileOutputStream;  
import java.io.IOException;  
import java.io.InputStream;  
import java.io.OutputStream;  
import java.net.URL;  
  
import org.jsoup.Jsoup;  
import org.jsoup.nodes.Document;  
import org.jsoup.nodes.Element;  
import org.jsoup.select.Elements;  
  
public class Demo4 {  
      
    private static final String url = "http://www.dbmeinv.com/?p=";  
    private static final String picPath = "d:/picTest";  
    private static String USER_AGENT="Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0";  
      
    public static void main(String[] args) {  
        System.out.println("下载完的图片位于d:/picTest");  
        for (int i = 0; i < 50; i++) {  
                try {  
                Document doc = Jsoup.connect(url+i).userAgent(USER_AGENT).timeout(3000).data("pager_offset", i+1+"").post();  
                Elements img = doc.select("img");  
                for (Element ele : img) {  
                    String src = ele.absUrl("src");  
                    //System.out.println(src);  
                    getImage(src);  
                }  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
          
        System.out.println("这些足够多了!");  
        System.out.println("图片下载完成!");  
    }  
      
      
    private static void getImage(String src) {  
        int indexName = src.lastIndexOf("/");  
        String name = src.substring(indexName, src.length());  
        //System.out.println(name);  
        InputStream in = null;    
        OutputStream out = null;  
        try {  
            URL url = new URL(src);  
            in = url.openStream();  
              
            //创建文件夹  
            File files = new File(picPath);  
            if(!files.exists())  
                files.mkdirs();  
              
            out = new BufferedOutputStream(new FileOutputStream(files+name));  
            for(int b;(b=in.read())!=-1;)  
                out.write(b);  
        } catch (Exception e) {  
            e.printStackTrace();  
        }finally{  
            try {  
                out.close();  
                in.close();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
              
        }  
          
    }  
}  


标签:

版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点!
本站所提供的图片等素材,版权归原作者所有,如需使用,请与原作者联系。

上一篇:Java CountDownLatch示例代码

下一篇:jquery将元素中的html代码全部清除