Wednesday, January 18, 2012

Naruto downloader from managreader.net

After reading Naruto upto chapter 520 (courtesy naruto with elisp) I was eager to read the rest. As of today the latest chapter is 570. I found mangareader.net after a bit of googling and was busy reading my way through the chapters. But, like before reading in the browser was not up to my taste. Currently the mcomix is the comic reader of my choice. So, I set about to write a script to automatically download the remaining chapters from mangareader.net. I do not know if its wrong to do so, the site does not have any terms of use :|
import re
from urllib2 import urlopen
from zipfile import ZipFile, ZIP_DEFLATED
from xml.dom.minidom import parseString
def get_info(line, alt_regex):
try:
line = line[:line.index('</a>')] + '</a>'
line = line[line.index('<a href'):]
dom = parseString(line)
info = {}
a = dom.getElementsByTagName('a')[0]
info['next'] = a.getAttribute('href')
img = a.getElementsByTagName('img')[0]
info['img_url'] = img.getAttribute('src')
info['img_ext'] = info['img_url'][info['img_url'].rindex('.') + 1:]
alt = img.getAttribute('alt')
m = re.search(alt_regex, alt)
info['chapter'] = int(m.group(1))
info['page'] = int(m.group(2))
dom.unlink()
return info
except Exception as e:
print('[ERROR] %s' % line)
print('[ERROR] ' + e)
def get_image(url):
try:
f = urlopen(url)
b = f.read()
f.close()
return b
except Exception as e:
print('[ERROR] ' + e)
def get_chapter(url_prefix, url_suffix, title, chapter=1):
need_more = True
alt_regex = re.compile(r'%s (\d+) - Page (\d+)' % title)
cbz = ZipFile('%03d.cbz' % chapter, "w", ZIP_DEFLATED)
url = '%s%s' % (url_prefix, url_suffix)
try:
while ( need_more ):
f = urlopen(url)
lines = f.readlines()
f.close()
line = filter(lambda x: x.find('id="img"') != -1, lines)[0]
info = get_info(line, alt_regex)
need_more = info['chapter'] == chapter
if ( need_more ):
cbz.writestr('%02d.%s' % (info['page'], info['img_ext']), get_image(info['img_url']))
url = '%s%s' % (url_prefix, info['next'])
else:
# new chapter
cbz.close()
chapter = info['chapter']
need_more = True
cbz = ZipFile('%03d.cbz' % chapter, "w", ZIP_DEFLATED)
except IndexError:
pass # image not found so end of chapter
get_chapter('http://www.mangareader.net', '/naruto/521', 'Naruto')
view raw mangareader.py hosted with ❤ by GitHub
So, I let it rip and now I'm busy reading... :)

No comments: