Hi squeek502 - thanks for that link!
I’ll check it out and will go from there.
Cheers - thanks again -
Update : I looked over autodocs but it looks pretty intimidating. Looks like the learning curve is steep.
I do now have some Python code which downloads all of the std library documentation - one page for each link, creates an index.html file with hyperlinks in the left window (similar to how Python’s doc appears) and puts the files in a zip file. I’ve also got a reformat.py program which loads all of the HTML files and tries to reformat them. I’ll put both files here in case anyone wants to try to convert them to Zig and enhance them.
( Confession - both bits of code come from ChatGPT. )
# download_zig_docs.py
# This code is released to the public domain.
# "Share and enjoy....." :)
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from pathlib import Path
import zipfile
from collections import defaultdict
from playwright.sync_api import sync_playwright
import re
BASE_URL = "https://ziglang.org/documentation/master/std/"
OUTPUT_DIR = Path("/home/andy/docs2html")
ZIP_FILENAME = "zig_docs_pages.zip"
INDEX_HTML = "index.html"
def main():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Get (url, link_text) from the main docs page
links_info = get_links_with_playwright(BASE_URL)
if not links_info:
print("No links found. The documentation may not have loaded as expected.")
return
# Create filenames from link text
url_to_filename = create_filenames_for_links(links_info)
# Download each page by using Playwright to load it and capture its rendered content.
# This attempts to get the content as if the link was "clicked".
render_and_save_pages(links_info, url_to_filename)
# Create ZIP
zip_file_path = OUTPUT_DIR / ZIP_FILENAME
with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for url, fname in url_to_filename.items():
zf.write(OUTPUT_DIR / fname, arcname=fname)
# Create index.html
index_file_path = OUTPUT_DIR / INDEX_HTML
index_html = build_index_html(list(url_to_filename.values()))
index_file_path.write_text(index_html, encoding='utf-8')
print("Done! The ZIP and index.html are in", OUTPUT_DIR)
print("Pages found:", len(url_to_filename))
def get_links_with_playwright(base_url):
"""Use Playwright to render the page and extract all documentation links along with their text."""
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(base_url)
# Wait to let JS load content
page.wait_for_timeout(5000)
html_content = page.content()
browser.close()
soup = BeautifulSoup(html_content, 'html.parser')
found = []
for a_tag in soup.find_all('a', href=True):
href = a_tag['href'].strip()
text = a_tag.get_text(strip=True)
if not text:
# If there's no visible text, use fallback
text = urlparse(href).path.strip('/') or "index"
full_url = urljoin(base_url, href)
if full_url.startswith(BASE_URL):
found.append((full_url, text))
return found
def create_filenames_for_links(links_info):
"""
Given a list of (url, link_text), create unique filenames from the link text.
If multiple links have the same text, add a numeric suffix.
"""
text_counts = defaultdict(int)
url_to_filename = {}
for url, link_text in links_info:
# Sanitize the link_text to be filename-safe
base_name = re.sub(r'[^a-zA-Z0-9_-]+', '_', link_text)
if not base_name:
base_name = "page"
text_counts[base_name] += 1
if text_counts[base_name] > 1:
fname = f"{base_name}_{text_counts[base_name]}.html"
else:
fname = f"{base_name}.html"
url_to_filename[url] = fname
return url_to_filename
def render_and_save_pages(links_info, url_to_filename):
"""
For each link, use Playwright to load that link (URL) and capture the rendered HTML.
This simulates visiting each link's page and retrieving the content that would be shown.
"""
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
for url, text in links_info:
fname = url_to_filename[url]
print(f"Rendering page for {url} -> {fname}")
page.goto(url)
# Wait some time for JS to load the relevant content.
# You might need to adjust this or use a specific selector wait:
page.wait_for_timeout(3000)
rendered_content = page.content()
(OUTPUT_DIR / fname).write_text(rendered_content, encoding='utf-8')
browser.close()
# Build the index page.
def build_index_html(filenames):
"""Build index.html with a two-pane layout and a search bar at the top of the left pane."""
filenames.sort()
nav_items = []
for fname in filenames:
display_name = fname
if display_name.endswith(".html"):
display_name = display_name[:-5] # remove .html extension from displayed text
nav_items.append(f'<li><a href="{fname}" target="main">{display_name}</a></li>')
nav_list = "\n".join(nav_items)
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>Zig Docs</title>
<style>
body {{
margin: 0;
display: flex;
height: 100vh;
font-family: sans-serif;
}}
nav {{
width: 250px;
overflow-y: auto;
border-right: 1px solid #ccc;
background: #f8f8f8;
display: flex;
flex-direction: column;
}}
.search-bar {{
padding: 1em;
border-bottom: 1px solid #ccc;
}}
.search-bar input[type="text"] {{
width: 100%;
padding: 0.5em;
box-sizing: border-box;
}}
nav ul {{
list-style: none;
padding: 0;
margin: 0;
flex: 1; /* allows the list to grow and scroll */
}}
nav li {{
margin: 0;
padding: 0.5em;
word-break: break-all;
}}
nav li:hover {{
background: #eee;
}}
nav a {{
text-decoration: none;
color: #333;
display: block;
}}
main {{
flex: 1;
overflow: hidden;
display: flex;
flex-direction: column;
}}
iframe {{
flex: 1;
border: none;
width: 100%;
height: 100%;
}}
</style>
</head>
<body>
<nav>
<div class="search-bar">
<input type="text" placeholder="Search...">
</div>
<ul>
{nav_list}
</ul>
</nav>
<main>
<iframe name="main"></iframe>
</main>
</body>
</html>
"""
return html
# Run the code.
if __name__ == "__main__":
main()
# reformat.py
# Reformat each Zig doc page to make it look similar
# to a Python documentation page.
# This code is released to the public domain.
# "Share and enjoy....." :)
import os
from pathlib import Path
from bs4 import BeautifulSoup
# Directory containing the HTML files
DIRECTORY = Path("/home/andy/docs2html") # Adjust as needed
def main():
# Process all HTML files except index.html
for file in DIRECTORY.glob("*.html"):
if file.name == "index.html":
continue
# Parse the existing HTML file
soup = BeautifulSoup(file.read_text(encoding='utf-8'), 'html.parser')
# Find the header (or top area) that we must keep intact
# Adjust this selector based on the actual structure of your pages.
# Example assumption: The Zig logo and tabs are in a <header> element.
header = soup.find('header')
if not header:
# If no header found, try other selectors
header = soup.find('div', class_='top-bar') or soup.find('nav')
# Extract the header and its markup
if header:
header_html = str(header)
header.decompose()
else:
header_html = ""
# The remainder of the page content (below the header):
body = soup.body or soup
main_content_html = ""
for element in body.contents:
if element.name:
main_content_html += str(element)
# Build new HTML without a sidebar
new_html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<title>{file.stem}</title>
<style>
body {{
margin: 0;
font-family: sans-serif;
}}
header {{
border-bottom: 1px solid #ccc;
padding: 1em;
}}
.main-content {{
padding: 2em;
line-height: 1.5;
}}
.main-content h1, .main-content h2, .main-content h3 {{
font-weight: normal;
}}
.main-content p {{
margin-bottom: 1em;
}}
</style>
</head>
<body>
{header_html}
<div class="main-content">
{main_content_html}
</div>
</body>
</html>
"""
# Write the modified HTML to a new file
output_file = DIRECTORY / f"reformatted_{file.name}"
output_file.write_text(new_html, encoding='utf-8')
print(f"Reformatted {file.name} -> {output_file.name}")
if __name__ == "__main__":
main()