ssg/generate.py

from dataclasses import dataclass
from pathlib import Path
import re
from html.parser import HTMLParser
from PIL import Image
from os import path

# image optimization ~
INPUT_SOURCE = "./politecafe/input"
OUTPUT_TARGET = "./politecafe/images"

@dataclass
class Article:
    path: str

    def read_contents(self) -> str:
        return Path(self.path).read_text()


class SSGParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.scales = [0.6, 1]
        self.output = ""

    def handle_endtag(self, tag):
        if tag not in ["img", "br"]:
            self.output += "</" + tag + ">"

    def handle_data(self, data):
        data = data.replace("<", "&lt;")
        data = data.replace(">", "&gt;")
        self.output += data

    def handle_starttag(self, tag, attrs):
        new_attrs = []
        self.output += "<" + tag
        if tag != "img":
            new_attrs = attrs
        else:
            img = None
            img_path = None
            new_attrs.append(("sizes", "(min-width: 768px) 25vw, 35vw"))
            for attr in attrs:
                if attr[0] != "src":
                    new_attrs.append(attr)
                elif attr[0] == "src":
                    img_path = path.join(INPUT_SOURCE, attr[1])
                    img = Image.open(img_path)
                    width = img.size[0]
                    height = img.size[1]

                    src_set = []
                    for scale in self.scales:
                        new_width = int(width * scale)
                        new_height = int(height * scale)
                        img_name = Path(path.basename(img_path)).stem
                        destination_path = path.join(
                            OUTPUT_TARGET, f"{img_name}{new_width}{new_height}.webp"
                        )
                        result = img.resize(size=(new_width, new_height))
                        # write image
                        result.save(
                            fp=destination_path,
                            format="WEBP",
                            height=new_height,
                            width=new_width,
                        )
                        # modify attribute
                        src = path.relpath(destination_path, "politecafe")
                        src_set.append(f"{src} {new_width}w")
                    # capture src_set
                    new_attrs.append(("srcset", ",".join(src_set)))
        # write attrs to tag
        for name, value in new_attrs:
            self.output += ' {}="{}"'.format(name, value)

        self.output += ">"


articles = [
    Article(
        "./politecafe/input/tech-002.html",
    ),
    Article(
        "./politecafe/input/art-002.html",
    ),
    Article(
        "./politecafe/input/art-001.html",
    ),
    Article(
        "./politecafe/input/tech-001.html",
    ),
]

# generate slugs and read articles
slugs = []
article_contents = []
for article in articles:
    contents = article.read_contents()
    parser = SSGParser()
    parser.feed(contents)
    contents = parser.output

    id_match = re.search(r'<h2 id="(.*?)"', contents)
    title_match = re.search(r'<h2 id=".*?>(.*?)</h2>', contents)
    if id_match is None:
        print("couldn't match id")
    elif title_match is None:
        print("Couldn't match title")
    else:
        id = id_match.group(1)
        title = title_match.group(1)
        article_contents.append(contents)
        slug = f"""<p><a href="#{id}">{title}</a></p>"""
        slugs.append(slug)

# plug into template, write to index.html
template = Path("./politecafe/input/template.html").read_text()

template = template.replace(
    "<div><!--- Articles ---></div>", "\n".join(article_contents)
)
template = template.replace("<div><!--- Slugs ---></div>", "\n".join(slugs))
with open("./politecafe/index.html", "w", encoding="utf-8") as index_html:
    index_html.write(template)