simplify docset script

2019-02-06 17:33:01 -06:00 · 2019-02-06 17:33:01 -06:00 · 5fb235c531
parent 22310a99c4
commit 5fb235c531
2 changed files with 74 additions and 83 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,3 +28,4 @@ purs/generated-docs
 tmp
 .cache
 tags
 purs/docset/purescript-local.docset/
--- a/purs/docset/gen-docset.py
+++ b/purs/docset/gen-docset.py
@ -11,88 +11,104 @@ from shutil import copyfile
 from html import unescape
 from bs4 import BeautifulSoup
 def fatal(msg):
 	print(msg, file=sys.stderr)
 	sys.exit(1)
 class URLUtilities:
 	HTML = '../generated-docs/'
 	INDEX =  HTML + 'index.html'
 class HTMLUtilities:
 	@staticmethod
 	def find_modules(html):
 		return re.findall(r'<li><a href="[^"]+">([^<]+)</a>', html)
 class Generator:
 	GENERATED_DOCS = '../generated-docs'
 	OUTPUT = 'purescript-local.docset'
-	def __init__(self):
+	def documents_path(self, *paths):
-		self.version = None
+		return os.path.join(self.OUTPUT, 'Contents/Resources/Documents', *paths)
 	def generate(self):
 		self.create_docset()
 		self.create_index()
 		self.save_assets()
-		modules = self.fetch_index()
+		print('Processing index')
 		modules = self.process_index()
 		print('Processing {} modules'.format(len(modules)))
 		for module in modules:
-			self.fetch_module(module)
+			print('Processing module {}'.format(module))
 			self.process_module(module)
 		self.db.close()
 		self.create_plist()
 		print('Done')
-	def fetch_index(self):
+	def save_assets(self):
-		self.version = None
+		copyfile('favicon-16x16.png', self.documents_path('../../../icon.png'))
-		print('Processing module list')
+		copyfile('favicon-32x32.png', self.documents_path('../../../icon@2x.png'))
 		with open(URLUtilities.INDEX, 'r') as f:
 			r = f.read()
 		html = re.sub(r'(</a></li>)</li>', r'\1', r) # fix html error
 		self.save_html(html, self.documents_path('index.html'))
 		modules = HTMLUtilities.find_modules(html)
 		return modules
-	@staticmethod
+	def create_docset(self):
-	def create_docset():
+		path = self.OUTPUT
 		path = Generator.OUTPUT
 		if os.path.exists(path):
 			shutil.rmtree(path)
-		os.makedirs(Generator.documents_path())
+		os.makedirs(self.documents_path())
-	def fetch_module(self, module):
+	def create_index(self):
-		print('Processing module {}'.format(module))
+		self.db = sqlite3.connect(self.documents_path('../docSet.dsidx'))
 		self.cursor = self.db.cursor()
 		self.cursor.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
 		self.cursor.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
 	def create_plist(self):
 		with open('Info.plist.in', 'r') as f:
 			plist = f.read()
 		with open(self.documents_path('../../Info.plist'), 'w') as f:
 			f.write(plist)
 	def find_modules(self, html):
 		return re.findall(r'<li><a href="[^"]+">([^<]+)</a>', html)
 	def process_index(self):
 		with open('{}/{}'.format(self.GENERATED_DOCS, 'index.html'), 'r') as f:
 			i = f.read()
 		i_html = re.sub(r'(</a></li>)</li>', r'\1', i) # fix html error
 		self.save_html(i_html, self.documents_path('index.html'))
 		return self.find_modules(i_html)
 	def process_module(self, module):
 		moduleFile = urllib.parse.quote(module, '') + '.html'
-		with open('{}/{}'.format(URLUtilities.HTML, moduleFile), 'r') as f:
+		with open('{}/{}'.format(self.GENERATED_DOCS, moduleFile), 'r') as f:
-			r = f.read()
+			m_html = f.read()
-		html = self.save_html(r, self.documents_path(moduleFile))
+		self.save_html(m_html, self.documents_path(moduleFile))
-		self.cursor.execute(
+		self.insert_search_index(module, 'Module', moduleFile)
 			'INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);',
 			[module, 'Module', moduleFile])
 		self.db.commit()
 		return r
 	@staticmethod
 	def documents_path(*paths):
 		return os.path.join(Generator.OUTPUT, 'Contents/Resources/Documents', *paths)
 	def save_html(self, html, path):
 		prefix = r''
 		soup = BeautifulSoup(html, 'html.parser')
 		# remove google font
 		soup.find('link', href=re.compile(r'^https://fonts\.googleapis\.com/.*')).decompose()
 		# remove top banner
 		soup.find('div', class_='top-banner').decompose()
 		# remove sidebar
 		aside = soup.find('div', class_='col--aside')
-		if(aside):
+		if aside: aside.decompose()
 			aside.decompose()
 		# find anchors
-		tlds = soup.find_all('div', class_='decl')
+		top_decls = soup.find_all('div', class_='decl')
-		for tld in tlds:
+		for top_decl in top_decls:
-			self.process_decl(path, tld, soup)
+			self.process_decl(path, top_decl, soup)
 		with open(path, 'w') as f:
 			f.write(str(soup))
 	def process_decl(self, path, decl, soup, type_hint = None):
 		type_, name = self.get_decl_type(decl, type_hint)
 		anchor_toc = self.to_anchor_toc(type_, name)
 		decl.insert(0, soup.new_tag('a', attrs={ 'name': anchor_toc, 'class': 'dashAnchor' }))
 		self.insert_search_index(name, type_,
 			'{}#{}'.format(os.path.relpath(path, self.documents_path()), anchor_toc))
 		if type_ == 'Class':
 			members_lbl = decl.find('h4', text='Members')
 			if members_lbl:
 				for member_decl in members_lbl.find_next_sibling().find_all('li', recursive=False):
 					self.process_decl(path, member_decl, soup, 'Function')
 		elif type_ == 'Type':
 			ctors_lbl = decl.find('h4', text='Constructors')
 			if ctors_lbl:
 				for ctor_decl in ctors_lbl.find_next_sibling().find_all('li', recursive=False):
 					self.process_decl(path, ctor_decl, soup, 'Constructor')
 	def get_decl_type(self, decl, type_hint = None):
 		type_, name = decl.get('id').split(':', 1)
 		name = unescape(name)
 		if type_hint:
@ -103,36 +119,16 @@ class Generator:
 			if signature:
 				if signature.code.find() == signature.code.find('span', class_='keyword', text='class'):
 					type_ = 'Class'
-		anchor_toc = '//apple_ref/cpp/{}/{}'.format(urllib.parse.quote(type_, ''), urllib.parse.quote(name, ''))
+		return type_, name
 		self.cursor.execute(
 			'INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);',
 			[name, type_, '{}#{}'.format(os.path.relpath(path, self.documents_path()), anchor_toc)])
 		a = soup.new_tag('a', attrs={ 'name': anchor_toc, 'class': 'dashAnchor' })
 		decl.insert(0, a)
 		if type_ == 'Class':
 			members_lbl = decl.find('h4', text='Members')
 			if members_lbl:
 				for member in members_lbl.find_next_sibling().find_all('li', recursive=False):
 					self.process_decl(path, member, soup, 'Function')
 		elif type_ == 'Type':
 			ctors_lbl = decl.find('h4', text='Constructors')
 			if ctors_lbl:
 				for ctor in ctors_lbl.find_next_sibling().find_all('li', recursive=False):
 					self.process_decl(path, ctor, soup, 'Constructor')
-	def save_assets(self):
+	def to_anchor_toc(self, type_, name):
-		copyfile('favicon-16x16.png', self.documents_path('../../../icon.png'))
+		return '//apple_ref/cpp/{}/{}'.format(urllib.parse.quote(type_, ''), urllib.parse.quote(name, ''))
 		copyfile('favicon-32x32.png', self.documents_path('../../../icon@2x.png'))
-	@staticmethod
+	def insert_search_index(self, name, type_, path):
-	def create_plist():
+		self.cursor.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);',
-		with open('Info.plist.in', 'r') as f:
+			[name, type_, path])
-			plist = f.read()
+                
-		with open(Generator.documents_path('../../Info.plist'), 'w') as f:
+	def convert_type(self, t):
 			f.write(plist)
 	@staticmethod
 	def convert_type(t):
 		TABLE = {
 			't': 'Type',
 			'v': 'Function',
@ -140,12 +136,6 @@ class Generator:
 		}
 		return TABLE[t] if t in TABLE else t
 	def create_index(self):
 		self.db = sqlite3.connect(self.documents_path('../docSet.dsidx'))
 		self.cursor = self.db.cursor()
 		self.cursor.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
 		self.cursor.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
 if __name__ == '__main__':
 	gen = Generator()
 	gen.generate()