From 5fb235c5311337ce96fb48eb7855d1effd0cb7de Mon Sep 17 00:00:00 2001 From: Jon Schoning Date: Wed, 6 Feb 2019 17:33:01 -0600 Subject: [PATCH] simplify docset script --- .gitignore | 1 + purs/docset/gen-docset.py | 156 ++++++++++++++++++-------------------- 2 files changed, 74 insertions(+), 83 deletions(-) diff --git a/.gitignore b/.gitignore index 79e70eb..d500870 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ purs/generated-docs tmp .cache tags +purs/docset/purescript-local.docset/ diff --git a/purs/docset/gen-docset.py b/purs/docset/gen-docset.py index e2c9a91..8d14825 100755 --- a/purs/docset/gen-docset.py +++ b/purs/docset/gen-docset.py @@ -11,88 +11,104 @@ from shutil import copyfile from html import unescape from bs4 import BeautifulSoup -def fatal(msg): - print(msg, file=sys.stderr) - sys.exit(1) - -class URLUtilities: - HTML = '../generated-docs/' - INDEX = HTML + 'index.html' - -class HTMLUtilities: - @staticmethod - def find_modules(html): - return re.findall(r'
  • ([^<]+)', html) - class Generator: + GENERATED_DOCS = '../generated-docs' OUTPUT = 'purescript-local.docset' - def __init__(self): - self.version = None + def documents_path(self, *paths): + return os.path.join(self.OUTPUT, 'Contents/Resources/Documents', *paths) def generate(self): self.create_docset() self.create_index() self.save_assets() - modules = self.fetch_index() + print('Processing index') + modules = self.process_index() print('Processing {} modules'.format(len(modules))) for module in modules: - self.fetch_module(module) + print('Processing module {}'.format(module)) + self.process_module(module) self.db.close() self.create_plist() print('Done') - def fetch_index(self): - self.version = None - print('Processing module list') - with open(URLUtilities.INDEX, 'r') as f: - r = f.read() - html = re.sub(r'(
  • )', r'\1', r) # fix html error - self.save_html(html, self.documents_path('index.html')) - modules = HTMLUtilities.find_modules(html) - return modules + def save_assets(self): + copyfile('favicon-16x16.png', self.documents_path('../../../icon.png')) + copyfile('favicon-32x32.png', self.documents_path('../../../icon@2x.png')) - @staticmethod - def create_docset(): - path = Generator.OUTPUT + def create_docset(self): + path = self.OUTPUT if os.path.exists(path): shutil.rmtree(path) - os.makedirs(Generator.documents_path()) + os.makedirs(self.documents_path()) - def fetch_module(self, module): - print('Processing module {}'.format(module)) + def create_index(self): + self.db = sqlite3.connect(self.documents_path('../docSet.dsidx')) + self.cursor = self.db.cursor() + self.cursor.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);') + self.cursor.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);') + + def create_plist(self): + with open('Info.plist.in', 'r') as f: + plist = f.read() + with open(self.documents_path('../../Info.plist'), 'w') as f: + f.write(plist) + + def find_modules(self, html): + return re.findall(r'
  • ([^<]+)', html) + + def process_index(self): + with open('{}/{}'.format(self.GENERATED_DOCS, 'index.html'), 'r') as f: + i = f.read() + i_html = re.sub(r'(
  • )', r'\1', i) # fix html error + self.save_html(i_html, self.documents_path('index.html')) + return self.find_modules(i_html) + + def process_module(self, module): moduleFile = urllib.parse.quote(module, '') + '.html' - with open('{}/{}'.format(URLUtilities.HTML, moduleFile), 'r') as f: - r = f.read() - html = self.save_html(r, self.documents_path(moduleFile)) - self.cursor.execute( - 'INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);', - [module, 'Module', moduleFile]) + with open('{}/{}'.format(self.GENERATED_DOCS, moduleFile), 'r') as f: + m_html = f.read() + self.save_html(m_html, self.documents_path(moduleFile)) + self.insert_search_index(module, 'Module', moduleFile) self.db.commit() - return r - - @staticmethod - def documents_path(*paths): - return os.path.join(Generator.OUTPUT, 'Contents/Resources/Documents', *paths) def save_html(self, html, path): - prefix = r'' soup = BeautifulSoup(html, 'html.parser') # remove google font soup.find('link', href=re.compile(r'^https://fonts\.googleapis\.com/.*')).decompose() # remove top banner soup.find('div', class_='top-banner').decompose() + # remove sidebar aside = soup.find('div', class_='col--aside') - if(aside): - aside.decompose() + if aside: aside.decompose() # find anchors - tlds = soup.find_all('div', class_='decl') - for tld in tlds: - self.process_decl(path, tld, soup) + top_decls = soup.find_all('div', class_='decl') + for top_decl in top_decls: + self.process_decl(path, top_decl, soup) with open(path, 'w') as f: f.write(str(soup)) def process_decl(self, path, decl, soup, type_hint = None): + type_, name = self.get_decl_type(decl, type_hint) + + anchor_toc = self.to_anchor_toc(type_, name) + decl.insert(0, soup.new_tag('a', attrs={ 'name': anchor_toc, 'class': 'dashAnchor' })) + + self.insert_search_index(name, type_, + '{}#{}'.format(os.path.relpath(path, self.documents_path()), anchor_toc)) + + if type_ == 'Class': + members_lbl = decl.find('h4', text='Members') + if members_lbl: + for member_decl in members_lbl.find_next_sibling().find_all('li', recursive=False): + self.process_decl(path, member_decl, soup, 'Function') + elif type_ == 'Type': + ctors_lbl = decl.find('h4', text='Constructors') + if ctors_lbl: + for ctor_decl in ctors_lbl.find_next_sibling().find_all('li', recursive=False): + self.process_decl(path, ctor_decl, soup, 'Constructor') + + def get_decl_type(self, decl, type_hint = None): type_, name = decl.get('id').split(':', 1) name = unescape(name) if type_hint: @@ -103,36 +119,16 @@ class Generator: if signature: if signature.code.find() == signature.code.find('span', class_='keyword', text='class'): type_ = 'Class' - anchor_toc = '//apple_ref/cpp/{}/{}'.format(urllib.parse.quote(type_, ''), urllib.parse.quote(name, '')) - self.cursor.execute( - 'INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);', - [name, type_, '{}#{}'.format(os.path.relpath(path, self.documents_path()), anchor_toc)]) - a = soup.new_tag('a', attrs={ 'name': anchor_toc, 'class': 'dashAnchor' }) - decl.insert(0, a) - if type_ == 'Class': - members_lbl = decl.find('h4', text='Members') - if members_lbl: - for member in members_lbl.find_next_sibling().find_all('li', recursive=False): - self.process_decl(path, member, soup, 'Function') - elif type_ == 'Type': - ctors_lbl = decl.find('h4', text='Constructors') - if ctors_lbl: - for ctor in ctors_lbl.find_next_sibling().find_all('li', recursive=False): - self.process_decl(path, ctor, soup, 'Constructor') + return type_, name - def save_assets(self): - copyfile('favicon-16x16.png', self.documents_path('../../../icon.png')) - copyfile('favicon-32x32.png', self.documents_path('../../../icon@2x.png')) + def to_anchor_toc(self, type_, name): + return '//apple_ref/cpp/{}/{}'.format(urllib.parse.quote(type_, ''), urllib.parse.quote(name, '')) - @staticmethod - def create_plist(): - with open('Info.plist.in', 'r') as f: - plist = f.read() - with open(Generator.documents_path('../../Info.plist'), 'w') as f: - f.write(plist) - - @staticmethod - def convert_type(t): + def insert_search_index(self, name, type_, path): + self.cursor.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);', + [name, type_, path]) + + def convert_type(self, t): TABLE = { 't': 'Type', 'v': 'Function', @@ -140,12 +136,6 @@ class Generator: } return TABLE[t] if t in TABLE else t - def create_index(self): - self.db = sqlite3.connect(self.documents_path('../docSet.dsidx')) - self.cursor = self.db.cursor() - self.cursor.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);') - self.cursor.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);') - if __name__ == '__main__': gen = Generator() gen.generate()