import urllib.request
import re

# here you should provide url of GOST on site gostinfo.ru without page number
url_gost = 'http://www.gostinfo.ru/PRI/Page/GetPage?MaterialID=253306&lpage=5&page='
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/12.04 Chromium/18.0.1025.168 Chrome/18.0.1025.168 Safari/535.19'
# Folder for saving .jpg files
sImagesFolder = 'D:\TEMP\_GOST_DOWNLOADER'

i = 1
f = urllib.request.urlopen(urllib.request.Request(url_gost+str(i), headers={'User-Agent': user_agent}))
b=f.read()
st1=b.decode("utf8")
m=re.search('Страница: \d+ из (?P<pages_num>\d+)', st1)
pages_number=int(m.group('pages_num'))
print("Downloading pages from 1 to",format(pages_number))

for i in range (1,pages_number+1):
	f = urllib.request.urlopen(urllib.request.Request(url_gost+str(i), headers={'User-Agent': user_agent}))
	b=f.read()
	st1=b.decode("utf8")
	i1=st1.find('GetPageCSS')
	sFinalImageURL='http://www.gostinfo.ru'+st1[i1-10:i1+61]
	sFinalImageURL=sFinalImageURL.replace('GetPageCSS','GetPageContent')
	sExt = 'jpg'
	sImage = '0000%(#)04d'%{"#":i}
	print("Retrieving page {0} of {1} :{2}".format(i,pages_number,sFinalImageURL))
	try:
        	u = urllib.request.urlopen(urllib.request.Request(sFinalImageURL, headers={'User-Agent': user_agent}))
	except:
		print("error: cannot retrieve image")
		continue
	raw_data = u.read()
	print("read {0} bytes".format(len(raw_data)))
	u.close()
	try:
		f1 = open("{0}/{1}.{2}".format(sImagesFolder,sImage,sExt),'wb')
		f1.write(raw_data)
		f1.close()
	except:
		print("couldn't write to {0}/{1}.{2}".format(sImagesFolder,sImage,sExt))
		f1.close()
	print()

# todo: convert bunch of images to .pdf