My static website generator using poole 24KB

  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. # =============================================================================
  4. #
  5. # Poole - A damn simple static website generator.
  6. # Copyright (C) 2012 Oben Sonne <>
  7. #
  8. # This file is part of Poole.
  9. #
  10. # Poole is free software: you can redistribute it and/or modify
  11. # it under the terms of the GNU General Public License as published by
  12. # the Free Software Foundation, either version 3 of the License, or
  13. # (at your option) any later version.
  14. #
  15. # Poole is distributed in the hope that it will be useful,
  16. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. # GNU General Public License for more details.
  19. #
  20. # You should have received a copy of the GNU General Public License
  21. # along with Poole. If not, see <>.
  22. #
  23. # =============================================================================
  24. from __future__ import with_statement
  25. import codecs
  26. import glob
  27. import imp
  28. import optparse
  29. import os
  30. from os.path import join as opj
  31. from os.path import exists as opx
  32. import re
  33. import shutil
  34. import StringIO
  35. import sys
  36. import traceback
  37. import urlparse
  38. from SimpleHTTPServer import SimpleHTTPRequestHandler
  39. from BaseHTTPServer import HTTPServer
  40. try:
  41. import markdown
  42. except ImportError:
  43. print("abort : need python-markdown, get it from "
  44. "")
  45. sys.exit(1)
  46. # =============================================================================
  47. # Python 2/3 hacks
  48. # =============================================================================
  49. PY3 = sys.version_info[0] == 3
  50. if PY3:
  51. import builtins
  52. exec_ = getattr(builtins, "exec")
  53. else:
  54. import tempfile
  55. def exec_(code, envdic):
  56. with tempfile.NamedTemporaryFile() as tf:
  57. tf.write('# -*- coding: utf-8 -*-\n')
  58. tf.write(code.encode('utf-8'))
  59. tf.flush()
  60. execfile(, envdic)
  61. # =============================================================================
  62. # init site
  63. # =============================================================================
  65. "page.html": """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">
  66. <html xmlns="" lang="en" xml:lang="en">
  67. <head>
  68. <meta http-equiv="Content-Type" content="text/html; charset={{ htmlspecialchars(__encoding__) }}" />
  69. <title>poole - {{ htmlspecialchars(page["title"]) }}</title>
  70. <meta name="description" content="{{ htmlspecialchars(page.get("description", "a poole site")) }}" />
  71. <meta name="keywords" content="{{ htmlspecialchars(page.get("keywords", "poole")) }}" />
  72. <link rel="stylesheet" type="text/css" href="poole.css" />
  73. </head>
  74. <body>
  75. <div id="box">
  76. <div id="header">
  77. <h1>a poole site</h1>
  78. <h2>{{ htmlspecialchars(page["title"]) }}</h2>
  79. </div>
  80. <div id="menu">
  81. <!--%
  82. mpages = [p for p in pages if "menu-position" in p]
  83. mpages.sort(key=lambda p: int(p["menu-position"]))
  84. entry = '<span class="%s"><a href="%s">%s</a></span>'
  85. for p in mpages:
  86. style = p["title"] == page["title"] and "current" or ""
  87. print(entry % (style, htmlspecialchars(p["url"]), htmlspecialchars(p["title"])))
  88. %-->
  89. </div>
  90. <div id="content">{{ __content__ }}</div>
  91. </div>
  92. <div id="footer">
  93. Built with <a href="">Poole</a>
  94. &middot;
  95. Licensed as <a href="">CC-SA</a>
  96. &middot;
  97. <a href="">Validate me</a>
  98. </div>
  99. </body>
  100. </html>
  101. """,
  102. # -----------------------------------------------------------------------------
  103. opj("input", ""): """
  104. title: home
  105. menu-position: 0
  106. ---
  107. ## Welcome to Poole
  108. In Poole you write your pages in [markdown][md]. It's easier to write
  109. markdown than HTML.
  110. Poole is made for simple websites you just want to get done, without installing
  111. a bunch of requirements and without learning a template engine.
  112. In a build, Poole copies every file from the *input* directory to the *output*
  113. directory. During that process every markdown file (ending with *md*, *mkd*,
  114. *mdown* or *markdown*) is converted to HTML using the project's `page.html`
  115. as a skeleton.
  116. [md]:
  117. """,
  118. # -----------------------------------------------------------------------------
  119. opj("input", ""): """
  120. menu-position: 4
  121. ---
  122. Poole has basic support for content generation using Python code inlined in
  123. page files. This is everything but a clear separation of logic and content but
  124. for simple sites this is just a pragmatic way to get things done fast.
  125. For instance the menu on this page is generated by some inlined Python code in
  126. the project's `page.html` file.
  127. Just ignore this feature if you don't need it :)
  128. Content generation by inlined Python code is good to add some zest to your
  129. site. If you use it a lot, you better go with more sophisticated site
  130. generators like [Hyde](
  131. """,
  132. # -----------------------------------------------------------------------------
  133. opj("input", ""): """
  134. menu-position: 3
  135. ---
  136. Every page of a poole site is based on *one global template file*, `page.html`.
  137. All you need to adjust the site layout is to
  138. * edit the page template `page.html` and
  139. * extend or edit the style file `input/poole.css`.
  140. """,
  141. opj("input", ""): """
  142. menu-position: 10
  143. ---
  144. Poole has basic blog support. If an input page's file name has a structure like
  145. ``, e.g. ``,
  146. Poole recognizes the date and post title and sets them as attributes of the
  147. page. These attributes can then be used to generate a list of blog posts:
  148. <!--%
  149. from datetime import datetime
  150. posts = [p for p in pages if "post" in p] # get all blog post pages
  151. posts.sort(key=lambda p: p.get("date"), reverse=True) # sort post pages by date
  152. for p in posts:
  153. date = datetime.strptime(, "%Y-%m-%d").strftime("%B %d, %Y")
  154. print " * **[%s](%s)** - %s" % (, p.url, date) # markdown list item
  155. %-->
  156. Have a look into `input/` to see how it works. Feel free to adjust it
  157. to your needs.
  158. """,
  159. # -----------------------------------------------------------------------------
  160. opj("input", "") : """
  161. ---
  162. ## {{ page["post"] }}
  163. *Posted at
  164. <!--%
  165. from datetime import datetime
  166. print datetime.strptime(page["date"], "%Y-%m-%d").strftime("%B %d, %Y")
  167. %-->*
  168. There is a bank in my eel, your argument is invalid.
  169. More nonsense at <>.
  170. """,
  171. # -----------------------------------------------------------------------------
  172. opj("input", "blog.2010-03-01.I_ate_all the"): """
  173. ## {{ page["post"] }}
  174. *Posted at <!--{ page["date"] }-->.*
  175. What *are* interior crocodile alligators? We just don't know.
  176. More nonsense at <>.
  177. """,
  178. # -----------------------------------------------------------------------------
  179. opj("input", "poole.css"): """
  180. body {
  181. font-family: sans;
  182. width: 800px;
  183. margin: 1em auto;
  184. color: #2e3436;
  185. }
  186. div#box {
  187. border: solid #2e3436 1px;
  188. }
  189. div#header, div#menu, div#content, div#footer {
  190. padding: 1em;
  191. }
  192. div#menu {
  193. background-color: #2e3436;
  194. padding: 0.6em 0 0.6em 0;
  195. }
  196. #menu span {
  197. background-color: #2e3436;
  198. font-weight: bold;
  199. padding: 0.6em;
  200. }
  201. #menu span.current {
  202. background-color: #555753;
  203. }
  204. #menu a {
  205. color: #fefefc;
  206. text-decoration: none;
  207. }
  208. div#footer {
  209. color: gray;
  210. text-align: center;
  211. font-size: small;
  212. }
  213. div#footer a {
  214. color: gray;
  215. text-decoration: none;
  216. }
  217. pre {
  218. border: dotted black 1px;
  219. background: #eeeeec;
  220. font-size: small;
  221. padding: 1em;
  222. }
  223. """
  224. }
  225. def init(project):
  226. """Initialize a site project."""
  227. if not opx(project):
  228. os.makedirs(project)
  229. if os.listdir(project):
  230. print("abort : project dir %s is not empty" % project)
  231. sys.exit(1)
  232. os.mkdir(opj(project, "input"))
  233. os.mkdir(opj(project, "output"))
  234. for fname, content in EXAMPLE_FILES.items():
  235. with open(opj(project, fname), 'w') as fp:
  236. fp.write(content)
  237. print("success: initialized project")
  238. # =============================================================================
  239. # build site
  240. # =============================================================================
  241. MKD_PATT = r'\.(?:md|mkd|mdown|markdown)$'
  242. class Page(dict):
  243. """Abstraction of a source page."""
  244. _template = None # template dictionary
  245. _opts = None # command line options
  246. _pstrip = None # path prefix to strip from (non-virtual) page file names
  247. _re_eom = re.compile(r'^---+ *\r?\n?$')
  248. _re_vardef = re.compile(r'^([^\n:=]+?)[:=]((?:.|\n )*)', re.MULTILINE)
  249. _sec_macros = "macros"
  250. _modmacs = None
  251. def __init__(self, fname, virtual=None, **attrs):
  252. """Create a new page.
  253. Page content is read from `fname`, except when `virtual` is given (a
  254. string representing the raw content of a virtual page).
  255. The filename refers to the page source file. For virtual pages, this
  256. *must* be relative to a projects input directory.
  257. Virtual pages may contain page attribute definitions similar to real
  258. pages. However, it probably is easier to provide the attributes
  259. directly. This may be done using arbitrary keyword arguments.
  260. """
  261. super(Page, self).__init__()
  262. self.update(self._template)
  263. self.update(attrs)
  264. self._virtual = virtual is not None
  265. fname = opj(self._pstrip, fname) if virtual else fname
  266. self["fname"] = fname
  267. self["url"] = re.sub(MKD_PATT, ".html", fname)
  268. self["url"] = self["url"][len(self._pstrip):].lstrip(os.path.sep)
  269. self["url"] = self["url"].replace(os.path.sep, "/")
  270. if virtual:
  271. self.raw = virtual
  272. else:
  273. with, 'r', self._opts.input_enc) as fp:
  274. self.raw = fp.readlines()
  275. # split raw content into macro definitions and real content
  276. vardefs = ""
  277. self.source = ""
  278. for line in self.raw:
  279. if not vardefs and self._re_eom.match(line):
  280. vardefs = self.source
  281. self.source = "" # only macro defs until here, reset source
  282. else:
  283. self.source += line
  284. for key, val in self._re_vardef.findall(vardefs):
  285. key = key.strip()
  286. val = val.strip()
  287. val = re.sub(r' *\n +', ' ', val) # clean out line continuation
  288. self[key] = val
  289. basename = os.path.basename(fname)
  290. fpatt = r'(.+?)(?:\.([0-9]+-[0-9]+-[0-9]+)(?:\.(.*))?)?%s' % MKD_PATT
  291. title, date, post = re.match(fpatt, basename).groups()
  292. title = title.replace("_", " ")
  293. post = post and post.replace("_", " ") or None
  294. self["title"] = self.get("title", title)
  295. if date and "date" not in self: self["date"] = date
  296. if post and "post" not in self: self["post"] = post
  297. self.html = ""
  298. def __getattr__(self, name):
  299. """Attribute-style access to dictionary items."""
  300. try:
  301. return self[name]
  302. except KeyError:
  303. raise AttributeError(name)
  304. def __str__(self):
  305. """Page representation by file name."""
  306. return ('%s (virtual)' % self.fname) if self._virtual else self.fname
  307. # -----------------------------------------------------------------------------
  308. def build(project, opts):
  309. """Build a site project."""
  310. # -------------------------------------------------------------------------
  311. # utilities
  312. # -------------------------------------------------------------------------
  313. def abort_iex(page, itype, inline, exc):
  314. """Abort because of an exception in inlined Python code."""
  315. print("abort : Python %s in %s failed" % (itype, page))
  316. print((" %s raising the exception " % itype).center(79, "-"))
  317. print(inline.encode('utf-8'))
  318. print(" exception ".center(79, "-"))
  319. print(exc)
  320. sys.exit(1)
  321. # -------------------------------------------------------------------------
  322. # regex patterns and replacements
  323. # -------------------------------------------------------------------------
  324. regx_escp = re.compile(r'\\((?:(?:&lt;|<)!--|{)(?:{|%))') # escaped code
  325. repl_escp = r'\1'
  326. regx_rurl = re.compile(r'(?<=(?:(?:\n| )src|href)=")([^#/&%].*?)(?=")')
  327. repl_rurl = lambda m: urlparse.urljoin(opts.base_url,
  328. regx_eval = re.compile(r'(?<!\\)(?:(?:<!--|{){)(.*?)(?:}(?:-->|}))', re.S)
  329. def repl_eval(m):
  330. """Replace a Python expression block by its evaluation."""
  331. expr =
  332. try:
  333. repl = eval(expr, macros.copy())
  334. except:
  335. abort_iex(page, "expression", expr, traceback.format_exc())
  336. else:
  337. if not isinstance(repl, basestring): # e.g. numbers
  338. repl = unicode(repl)
  339. elif not isinstance(repl, unicode):
  340. repl = repl.decode("utf-8")
  341. return repl
  342. regx_exec = re.compile(r'(?<!\\)(?:(?:<!--|{)%)(.*?)(?:%(?:-->|}))', re.S)
  343. def repl_exec(m):
  344. """Replace a block of Python statements by their standard output."""
  345. stmt ="\r\n", "\n")
  346. # base indentation
  347. ind_lvl = len(re.findall(r'^(?: *\n)*( *)', stmt, re.MULTILINE)[0])
  348. ind_rex = re.compile(r'^ {0,%d}' % ind_lvl, re.MULTILINE)
  349. stmt = ind_rex.sub('', stmt)
  350. # execute
  351. sys.stdout = StringIO.StringIO()
  352. try:
  353. exec_(stmt, macros.copy())
  354. except:
  355. sys.stdout = sys.__stdout__
  356. abort_iex(page, "statements", stmt, traceback.format_exc())
  357. else:
  358. repl = sys.stdout.getvalue()[:-1] # remove last line break
  359. sys.stdout = sys.__stdout__
  360. if not isinstance(repl, unicode):
  361. repl = repl.decode(opts.input_enc)
  362. return repl
  363. # -------------------------------------------------------------------------
  364. # preparations
  365. # -------------------------------------------------------------------------
  366. dir_in = opj(project, "input")
  367. dir_out = opj(project, "output")
  368. page_html = opj(project, "page.html")
  369. # check required files and folders
  370. for pelem in (page_html, dir_in, dir_out):
  371. if not opx(pelem):
  372. print("abort : %s does not exist, looks like project has not been "
  373. "initialized" % pelem)
  374. sys.exit(1)
  375. # prepare output directory
  376. for fod in glob.glob(opj(dir_out, "*")):
  377. if os.path.isdir(fod):
  378. shutil.rmtree(fod)
  379. else:
  380. os.remove(fod)
  381. if not opx(dir_out):
  382. os.mkdir(dir_out)
  383. # macro module
  384. fname = opj(opts.project, "")
  385. macros = imp.load_source("macros", fname).__dict__ if opx(fname) else {}
  386. macros["__encoding__"] = opts.output_enc
  387. macros["options"] = opts
  388. macros["project"] = project
  389. macros["input"] = dir_in
  390. macros["output"] = dir_out
  391. # "builtin" functions for use in macros and templates
  392. macros["htmlspecialchars"] = htmlspecialchars
  393. macros["Page"] = Page
  394. # -------------------------------------------------------------------------
  395. # process input files
  396. # -------------------------------------------------------------------------
  397. Page._template = macros.get("page", {})
  398. Page._opts = opts
  399. Page._pstrip = dir_in
  400. pages = []
  401. custom_converter = macros.get('converter', {})
  402. for cwd, dirs, files in os.walk(dir_in.decode(opts.filename_enc)):
  403. cwd_site = cwd[len(dir_in):].lstrip(os.path.sep)
  404. for sdir in dirs[:]:
  405. if, opj(cwd_site, sdir)):
  406. dirs.remove(sdir)
  407. else:
  408. os.mkdir(opj(dir_out, cwd_site, sdir))
  409. for f in files:
  410. if, opj(cwd_site, f)):
  411. pass
  412. elif, f):
  413. page = Page(opj(cwd, f))
  414. pages.append(page)
  415. foo = opj(cwd, f)
  416. bar = opj(dir_out, f)
  417. print('info : copy %s' % bar)
  418. shutil.copyfile(foo, bar)
  419. else:
  420. # either use a custom converter or do a plain copy
  421. for patt, (func, ext) in custom_converter.items():
  422. if, f):
  423. f_src = opj(cwd, f)
  424. f_dst = opj(dir_out, cwd_site, f)
  425. f_dst = '%s.%s' % (os.path.splitext(f_dst)[0], ext)
  426. print('info : convert %s (%s)' % (f_src, func.__name__))
  427. func(f_src, f_dst)
  428. break
  429. else:
  430. src = opj(cwd, f)
  431. try:
  432. shutil.copy(src, opj(dir_out, cwd_site))
  433. except OSError:
  434. # some filesystems like FAT won't allow shutil.copy
  435. shutil.copyfile(src, opj(dir_out, cwd_site, f))
  436. pages.sort(key=lambda p: int(p.get("sval", "0")))
  437. macros["pages"] = pages
  438. # -------------------------------------------------------------------------
  439. # run pre-convert hooks in macro module (named 'once' before)
  440. # -------------------------------------------------------------------------
  441. hooks = [a for a in macros if re.match(r'hook_preconvert_|once_', a)]
  442. for fn in sorted(hooks):
  443. macros[fn]()
  444. # -------------------------------------------------------------------------
  445. # convert pages (markdown to HTML)
  446. # -------------------------------------------------------------------------
  447. for page in pages:
  448. print("info : convert %s" % page)
  449. # replace expressions and statements in page source
  450. macros["page"] = page
  451. out = regx_eval.sub(repl_eval, page.source)
  452. out = regx_exec.sub(repl_exec, out)
  453. # convert to HTML
  454. page.html = markdown.Markdown(extensions=opts.md_ext).convert(out)
  455. # -------------------------------------------------------------------------
  456. # run post-convert hooks in macro module
  457. # -------------------------------------------------------------------------
  458. hooks = [a for a in macros if a.startswith("hook_postconvert_")]
  459. for fn in sorted(hooks):
  460. macros[fn]()
  461. # -------------------------------------------------------------------------
  462. # render complete HTML pages
  463. # -------------------------------------------------------------------------
  464. with, "page.html"), 'r', opts.input_enc) as fp:
  465. skeleton =
  466. for page in pages:
  467. print("info : render %s" % page.url)
  468. # replace expressions and statements in page.html
  469. macros["page"] = page
  470. macros["__content__"] = page.html
  471. out = regx_eval.sub(repl_eval, skeleton)
  472. out = regx_exec.sub(repl_exec, out)
  473. # un-escape escaped python code blocks
  474. out = regx_escp.sub(repl_escp, out)
  475. # make relative links absolute
  476. out = regx_rurl.sub(repl_rurl, out)
  477. # write HTML page
  478. fname = page.fname.replace(dir_in, dir_out)
  479. fname = re.sub(MKD_PATT, ".html", fname)
  480. with, 'w', opts.output_enc) as fp:
  481. fp.write(out)
  482. # -------------------------------------------------------------------------
  483. # remove empty subfolders
  484. # -------------------------------------------------------------------------
  485. removeEmptyFolders(dir_out)
  486. print("success: built project")
  487. def removeEmptyFolders(path):
  488. # remove empty subfolders
  489. files = os.listdir(path)
  490. if len(files):
  491. for f in files:
  492. fullpath = os.path.join(path, f)
  493. if os.path.isdir(fullpath):
  494. removeEmptyFolders(fullpath)
  495. # Dirty OS X Hack
  496. try:
  497. os.remove(os.path.join(path, ".DS_Store"))
  498. except OSError as ex:
  499. pass
  500. # if folder empty, delete it
  501. files = os.listdir(path)
  502. if len(files) == 0:
  503. print "info : removing empty folder: ", path
  504. os.rmdir(path)
  505. # =============================================================================
  506. # serve site
  507. # =============================================================================
  508. def serve(project, port):
  509. """Temporary serve a site project."""
  510. root = opj(project, "output")
  511. if not os.listdir(project):
  512. print("abort : output dir is empty (build project first!)")
  513. sys.exit(1)
  514. os.chdir(root)
  515. server = HTTPServer(('', port), SimpleHTTPRequestHandler)
  516. server.serve_forever()
  517. # =============================================================================
  518. # options
  519. # =============================================================================
  520. def options():
  521. """Parse and validate command line arguments."""
  522. usage = ("Usage: %prog --init [path/to/project]\n"
  523. " %prog --build [OPTIONS] [path/to/project]\n"
  524. " %prog --serve [OPTIONS] [path/to/project]\n"
  525. "\n"
  526. " Project path is optional, '.' is used as default.")
  527. op = optparse.OptionParser(usage=usage)
  528. op.add_option("-i" , "--init", action="store_true", default=False,
  529. help="init project")
  530. op.add_option("-b" , "--build", action="store_true", default=False,
  531. help="build project")
  532. op.add_option("-s" , "--serve", action="store_true", default=False,
  533. help="serve project")
  534. og = optparse.OptionGroup(op, "Build options")
  535. og.add_option("", "--base-url", default="/", metavar="URL",
  536. help="base url for relative links (default: /)")
  537. og.add_option("" , "--ignore", default=r"^\.|~$", metavar="REGEX",
  538. help="input files to ignore (default: '^\.|~$')")
  539. og.add_option("" , "--md-ext", default=[], metavar="EXT",
  540. action="append", help="enable a markdown extension")
  541. og.add_option("", "--input-enc", default="utf-8", metavar="ENC",
  542. help="encoding of input pages (default: utf-8)")
  543. og.add_option("", "--output-enc", default="utf-8", metavar="ENC",
  544. help="encoding of output pages (default: utf-8)")
  545. og.add_option("", "--filename-enc", default="utf-8", metavar="ENC",
  546. help="encoding of file names (default: utf-8)")
  547. op.add_option_group(og)
  548. og = optparse.OptionGroup(op, "Serve options")
  549. og.add_option("" , "--port", default=8080,
  550. metavar="PORT", type="int",
  551. help="port for serving (default: 8080)")
  552. op.add_option_group(og)
  553. opts, args = op.parse_args()
  554. if opts.init + + opts.serve < 1:
  555. op.print_help()
  556. op.exit()
  557. opts.project = args and args[0] or "."
  558. return opts
  559. # =============================================================================
  560. # template helper functions
  561. # =============================================================================
  562. def htmlspecialchars(s):
  563. """
  564. Replace the characters that are special within HTML (&, <, > and ")
  565. with their equivalent character entity (e.g., &amp;). This should be
  566. called whenever an arbitrary string is inserted into HTML (so in most
  567. places where you use {{ variable }} in your templates).
  568. Note that " is not special in most HTML, only within attributes.
  569. However, since escaping it does not hurt within normal HTML, it is
  570. just escaped unconditionally.
  571. """
  572. escape = {
  573. "&": "&amp;",
  574. '"': "&quot;",
  575. ">": "&gt;",
  576. "<": "&lt;",
  577. }
  578. # Look up the translation for every character in s (defaulting to
  579. # the character itself if no translation is available).
  580. return ''.join([escape.get(c,c) for c in s])
  581. # =============================================================================
  582. # main
  583. # =============================================================================
  584. def main():
  585. opts = options()
  586. if opts.init:
  587. init(opts.project)
  588. if
  589. build(opts.project, opts)
  590. if opts.serve:
  591. serve(opts.project, opts.port)
  592. if __name__ == '__main__':
  593. main()