python 2.x
# -*- coding: utf-8 -*- import re import urllib url = 'http://tieba.baidu.com/p/4872795764' page = urllib.urlopen(url) html = page.read() r = 'src="(.*?\.jpg)" size' # 注意此处?的作用,取消贪婪匹配 结合findall方法,只匹配分组中的内容 imgre = re.compile(r) imglist = re.findall(imgre, html) count = 0 for imgurl in imglist: urllib.urlretrieve(imgurl, filename='mac_book Pro %s.jpg' % count) count += 1 函数: def gethtml(url): html = urllib.urlopen(url).read() return html def getimg(html): r = 'src="(.*?\.jpg)" size' imgre = re.compile(r) imglist = re.findall(imgre, html) print imglist count = 0 for imgurl in imglist: urllib.urlretrieve(imgurl, filename='mac_book_Pro_%s.jpg' % count) count += 1 html = gethtml('http://tieba.baidu.com/p/4872795764') getimg(html)