利用htmlunit下载网页上的文件
2018-07-20 来源:open-open
import java.io.FileOutputStream; import java.io.InputStream; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import com.gargoylesoftware.htmlunit.Page; import com.gargoylesoftware.htmlunit.WebClient; public class DownloadFile { public static void main(String[] args) throws Exception { String baseUrl = "<a href="http://hanyu.iciba.com/hanzi/1.shtml";" target="_blank">http://hanyu.iciba.com/hanzi/1.shtml";</a> String bihuaRegex = "class=\"guanggao\"[^<]*<[^<]*<param\\s*name=\"movie\"\\s*value=\"([^\"]*)"; String aSoundRegex = "class=\"js12\">ā.*?name=\"FlashVars\"\\s*value=\"f=([^\"]*)"; String eSoundRegex = "class=\"js12\">ē.*?name=\"FlashVars\"\\s*value=\"f=([^\"]*)"; WebClient client = new WebClient(); client.getOptions().setCssEnabled(false); client.getOptions().setJavaScriptEnabled(false); client.getOptions().setThrowExceptionOnFailingStatusCode(false); client.getOptions().setThrowExceptionOnScriptError(false); Page page = client.getPage(baseUrl); String source = page.getWebResponse().getContentAsString(); Matcher mBihuan = Regex(source, bihuaRegex); Matcher mA = Regex(source, aSoundRegex); Matcher mE = Regex(source, eSoundRegex); while(mBihuan.find()) { String url = "<a href="http://hanyu.iciba.com/" + mBihuan.group" target="_blank">http://hanyu.iciba.com/" + mBihuan.group</a>(1); page = client.getPage(url); saveFile(page, "d:/testDownload/bihua.swf"); } while(mA.find()) { String url = mA.group(1); page = client.getPage(url); saveFile(page, "d:/testDownload/a.mp3"); } while(mE.find()) { String url = mE.group(1); page = client.getPage(url); saveFile(page, "d:/testDownload/e.mp3"); } } public static Matcher Regex(String source, String regex) { Pattern p = Pattern.compile(regex, Pattern.DOTALL); return p.matcher(source); } public static void saveFile(Page page, String file) throws Exception { InputStream is = page.getWebResponse().getContentAsStream(); FileOutputStream output = new FileOutputStream(file); IOUtils.copy(is, output); output.close(); } }
标签:
版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点!
本站所提供的图片等素材,版权归原作者所有,如需使用,请与原作者联系。
上一篇:php实现文件下载!
最新资讯
热门推荐