pythonץȡͼƬʾÀý
2018-07-20 À´Ô´£ºopen-open
#!/usr/bin/python # -*- coding:utf-8 -*- import re import os import urllib, urllib2, cookielib import shutil from BeautifulSoup import BeautifulSoup # ---- utils ---- def normalize_url(url): return "http://" + url if cmp(url[0:7],"http://") != 0 else url def safeDir(dir): return dir.replace('/', '') # ---- variable ---- homepagePrefix = "http://60dxw.comww1.baisex.me/forum-47-" homepageSuffix = ".html" threadPrefix = "http://60dxw.comww1.baisex.me/" homedir = "baixingge" # ---- login ---- cookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar()) opener = urllib2.build_opener(cookie) # ---- file ---- if (os.path.exists(homedir) == False): os.mkdir(homedir) os.chdir(homedir) # ---- crawl ---- for page in range(1, 25): pageUrl = '{0}{1}{2}'.format(homepagePrefix,page,homepageSuffix) # ---- mkdir ---- if (os.path.exists(str(page)) == False): os.mkdir(str(page)) os.chdir(str(page)) print pageUrl # ---- download ---- html_body = urllib.urlopen(pageUrl).read() soup = BeautifulSoup(html_body) # ---- extract ---- threaddUrls = [] urlRaws = soup.findAll('th', attrs = {'class' : ['new', 'common']}) urlPattern = re.compile(r'href="([^"]*)"') titlePattern = re.compile(r'>([^<]*)</a>') for urlRaw in urlRaws: h = urlPattern.search(str(urlRaw)) t = titlePattern.search(str(urlRaw)) threadUrl = h.group(1) threadTitle = t.group(1) if (os.path.exists(threadTitle) == False): os.mkdir(safeDir(threadTitle)) else: continue os.chdir(safeDir(threadTitle)) page_url = threadPrefix + threadUrl print "---->{0}".format(page_url) print "---->{0}".format(safeDir(threadTitle)) page_body = urllib.urlopen(page_url).read() page_soup = BeautifulSoup(page_body) imgPattern = re.compile(r'img src="([^"]*)" onload') i = imgPattern.findall(str(page_soup)) index = 0 for img in i: print "-------->{0}".format(img) imgSuffix = img[img.rindex('.'):] imgName = "{0}{1}".format(str(index), imgSuffix) urllib.urlretrieve(img, imgName, None) index += 1 os.chdir("../") os.chdir("../")
±êÇ©£º
°æȨÉêÃ÷£º±¾Õ¾ÎÄÕ²¿·Ö×ÔÍøÂ磬ÈçÓÐÇÖȨ£¬ÇëÁªÏµ£ºwest999com@outlook.com
Ìرð×¢Ò⣺±¾Õ¾ËùÓÐתÔØÎÄÕÂÑÔÂÛ²»´ú±í±¾Õ¾¹Ûµã£¡
±¾Õ¾ËùÌṩµÄͼƬµÈËزģ¬°æȨ¹éÔ×÷ÕßËùÓУ¬ÈçÐèʹÓã¬ÇëÓëÔ×÷ÕßÁªÏµ¡£
ÉÏһƪ:½ØÈ¡ÖÐÎÄ×Ö·û´®PHP´úÂë
ÏÂһƪ:ͼƬËõ·ÅˮӡPHPÀà
×îÐÂ×ÊѶ
- SEOµÄÓû§»¥¶¯£ºÂÛÔÚÏßÆÀÂÛÄÚÈݵÄÖØ
- 3¸ö±»ºöÊÓ²ßÂÔ°ïÄãÌáÉýµçÉÌÍøÕ¾Á÷Á¿
- ÈçºÎÕÒµ½²¢¸Ä½øÄÇЩЧ¹û²»¼ÑµÄÈë¿Ú
- Ïêϸ˵˵Ŀ±ê¹Ø¼ü´Ê
- ˵˵ÄÇЩÄܹ»ÔÚÈý¸öÔÂ×öµ½10ÍòIPµÄ
- Á÷Á¿ÎªÍõÊÇSEO˼άµÄ¶¾Ò©
- ͸ÎöÍøÕ¾Ìø³öÂÊ£ºÒ³ÃæÖÊÁ¿ºÜÖØÒª£¡
- °Ù¶ÈÁãλÖÃÅÅÃû£¬ÈçºÎÀûÓÃÆä¿ìËÙ»ñ
- ¸É»õ£º4´ó±êÌâÓÅ»¯¼¼ÇÉÔÀíÒý±¬×ÔÈ»
- ÍøÕ¾°Ù¶È¿ìÕÕʱ¼ä²»¸üлòµ¹ÍËÔõô
ÈÈÃÅÍƼö