diff --git a/.gitignore b/.gitignore
index 79e70eb..d500870 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,4 @@ purs/generated-docs
tmp
.cache
tags
+purs/docset/purescript-local.docset/
diff --git a/purs/docset/gen-docset.py b/purs/docset/gen-docset.py
index e2c9a91..8d14825 100755
--- a/purs/docset/gen-docset.py
+++ b/purs/docset/gen-docset.py
@@ -11,88 +11,104 @@ from shutil import copyfile
from html import unescape
from bs4 import BeautifulSoup
-def fatal(msg):
- print(msg, file=sys.stderr)
- sys.exit(1)
-
-class URLUtilities:
- HTML = '../generated-docs/'
- INDEX = HTML + 'index.html'
-
-class HTMLUtilities:
- @staticmethod
- def find_modules(html):
- return re.findall(r'
([^<]+)', html)
-
class Generator:
+ GENERATED_DOCS = '../generated-docs'
OUTPUT = 'purescript-local.docset'
- def __init__(self):
- self.version = None
+ def documents_path(self, *paths):
+ return os.path.join(self.OUTPUT, 'Contents/Resources/Documents', *paths)
def generate(self):
self.create_docset()
self.create_index()
self.save_assets()
- modules = self.fetch_index()
+ print('Processing index')
+ modules = self.process_index()
print('Processing {} modules'.format(len(modules)))
for module in modules:
- self.fetch_module(module)
+ print('Processing module {}'.format(module))
+ self.process_module(module)
self.db.close()
self.create_plist()
print('Done')
- def fetch_index(self):
- self.version = None
- print('Processing module list')
- with open(URLUtilities.INDEX, 'r') as f:
- r = f.read()
- html = re.sub(r'()', r'\1', r) # fix html error
- self.save_html(html, self.documents_path('index.html'))
- modules = HTMLUtilities.find_modules(html)
- return modules
+ def save_assets(self):
+ copyfile('favicon-16x16.png', self.documents_path('../../../icon.png'))
+ copyfile('favicon-32x32.png', self.documents_path('../../../icon@2x.png'))
- @staticmethod
- def create_docset():
- path = Generator.OUTPUT
+ def create_docset(self):
+ path = self.OUTPUT
if os.path.exists(path):
shutil.rmtree(path)
- os.makedirs(Generator.documents_path())
+ os.makedirs(self.documents_path())
- def fetch_module(self, module):
- print('Processing module {}'.format(module))
+ def create_index(self):
+ self.db = sqlite3.connect(self.documents_path('../docSet.dsidx'))
+ self.cursor = self.db.cursor()
+ self.cursor.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
+ self.cursor.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
+
+ def create_plist(self):
+ with open('Info.plist.in', 'r') as f:
+ plist = f.read()
+ with open(self.documents_path('../../Info.plist'), 'w') as f:
+ f.write(plist)
+
+ def find_modules(self, html):
+ return re.findall(r'([^<]+)', html)
+
+ def process_index(self):
+ with open('{}/{}'.format(self.GENERATED_DOCS, 'index.html'), 'r') as f:
+ i = f.read()
+ i_html = re.sub(r'()', r'\1', i) # fix html error
+ self.save_html(i_html, self.documents_path('index.html'))
+ return self.find_modules(i_html)
+
+ def process_module(self, module):
moduleFile = urllib.parse.quote(module, '') + '.html'
- with open('{}/{}'.format(URLUtilities.HTML, moduleFile), 'r') as f:
- r = f.read()
- html = self.save_html(r, self.documents_path(moduleFile))
- self.cursor.execute(
- 'INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);',
- [module, 'Module', moduleFile])
+ with open('{}/{}'.format(self.GENERATED_DOCS, moduleFile), 'r') as f:
+ m_html = f.read()
+ self.save_html(m_html, self.documents_path(moduleFile))
+ self.insert_search_index(module, 'Module', moduleFile)
self.db.commit()
- return r
-
- @staticmethod
- def documents_path(*paths):
- return os.path.join(Generator.OUTPUT, 'Contents/Resources/Documents', *paths)
def save_html(self, html, path):
- prefix = r''
soup = BeautifulSoup(html, 'html.parser')
# remove google font
soup.find('link', href=re.compile(r'^https://fonts\.googleapis\.com/.*')).decompose()
# remove top banner
soup.find('div', class_='top-banner').decompose()
+ # remove sidebar
aside = soup.find('div', class_='col--aside')
- if(aside):
- aside.decompose()
+ if aside: aside.decompose()
# find anchors
- tlds = soup.find_all('div', class_='decl')
- for tld in tlds:
- self.process_decl(path, tld, soup)
+ top_decls = soup.find_all('div', class_='decl')
+ for top_decl in top_decls:
+ self.process_decl(path, top_decl, soup)
with open(path, 'w') as f:
f.write(str(soup))
def process_decl(self, path, decl, soup, type_hint = None):
+ type_, name = self.get_decl_type(decl, type_hint)
+
+ anchor_toc = self.to_anchor_toc(type_, name)
+ decl.insert(0, soup.new_tag('a', attrs={ 'name': anchor_toc, 'class': 'dashAnchor' }))
+
+ self.insert_search_index(name, type_,
+ '{}#{}'.format(os.path.relpath(path, self.documents_path()), anchor_toc))
+
+ if type_ == 'Class':
+ members_lbl = decl.find('h4', text='Members')
+ if members_lbl:
+ for member_decl in members_lbl.find_next_sibling().find_all('li', recursive=False):
+ self.process_decl(path, member_decl, soup, 'Function')
+ elif type_ == 'Type':
+ ctors_lbl = decl.find('h4', text='Constructors')
+ if ctors_lbl:
+ for ctor_decl in ctors_lbl.find_next_sibling().find_all('li', recursive=False):
+ self.process_decl(path, ctor_decl, soup, 'Constructor')
+
+ def get_decl_type(self, decl, type_hint = None):
type_, name = decl.get('id').split(':', 1)
name = unescape(name)
if type_hint:
@@ -103,36 +119,16 @@ class Generator:
if signature:
if signature.code.find() == signature.code.find('span', class_='keyword', text='class'):
type_ = 'Class'
- anchor_toc = '//apple_ref/cpp/{}/{}'.format(urllib.parse.quote(type_, ''), urllib.parse.quote(name, ''))
- self.cursor.execute(
- 'INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);',
- [name, type_, '{}#{}'.format(os.path.relpath(path, self.documents_path()), anchor_toc)])
- a = soup.new_tag('a', attrs={ 'name': anchor_toc, 'class': 'dashAnchor' })
- decl.insert(0, a)
- if type_ == 'Class':
- members_lbl = decl.find('h4', text='Members')
- if members_lbl:
- for member in members_lbl.find_next_sibling().find_all('li', recursive=False):
- self.process_decl(path, member, soup, 'Function')
- elif type_ == 'Type':
- ctors_lbl = decl.find('h4', text='Constructors')
- if ctors_lbl:
- for ctor in ctors_lbl.find_next_sibling().find_all('li', recursive=False):
- self.process_decl(path, ctor, soup, 'Constructor')
+ return type_, name
- def save_assets(self):
- copyfile('favicon-16x16.png', self.documents_path('../../../icon.png'))
- copyfile('favicon-32x32.png', self.documents_path('../../../icon@2x.png'))
+ def to_anchor_toc(self, type_, name):
+ return '//apple_ref/cpp/{}/{}'.format(urllib.parse.quote(type_, ''), urllib.parse.quote(name, ''))
- @staticmethod
- def create_plist():
- with open('Info.plist.in', 'r') as f:
- plist = f.read()
- with open(Generator.documents_path('../../Info.plist'), 'w') as f:
- f.write(plist)
-
- @staticmethod
- def convert_type(t):
+ def insert_search_index(self, name, type_, path):
+ self.cursor.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?);',
+ [name, type_, path])
+
+ def convert_type(self, t):
TABLE = {
't': 'Type',
'v': 'Function',
@@ -140,12 +136,6 @@ class Generator:
}
return TABLE[t] if t in TABLE else t
- def create_index(self):
- self.db = sqlite3.connect(self.documents_path('../docSet.dsidx'))
- self.cursor = self.db.cursor()
- self.cursor.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
- self.cursor.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
-
if __name__ == '__main__':
gen = Generator()
gen.generate()