Skip to content

Commit

Permalink
svgshot (#125)
Browse files Browse the repository at this point in the history
* init

* it works :)

* v1.0.1

* more sane default size

* add todo

* v1.0.3

* use wikipedia as example; add shebang

* bump version

* add some examples, fix cli params

* v1.0.5

* remove vestigial code

* v1.0.7

* close browser always

* v1.0.9

* remove unused deps

* Limit parallel loads

* v1.0.10

* v1.1.0

* add --block

* move svgshot to folder for merging

* Move files around

* merge in svgshot

* formatting fixes

* load inkscape bin

* Hmm

* something weird is breaking it

* Create run.sh

* Attempt bypass fuse issues

* Update run.sh

fix env canonicalisation

Co-authored-by: thomas nj shadwell <thomas.shadwell@googlemail.com>
  • Loading branch information
Zemnmez and TShadwell authored May 24, 2022
1 parent 63ca077 commit ee2c37f
Show file tree
Hide file tree
Showing 25 changed files with 1,277 additions and 18 deletions.
11 changes: 9 additions & 2 deletions bzl/deps.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

# Install the nodejs "bootstrap" package
# This provides the basic tools for running and packaging nodejs programs in Bazel
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")

def fetch_dependencies():
http_archive(
Expand Down Expand Up @@ -110,6 +109,14 @@ def fetch_dependencies():
],
)

http_file(
name = "inkscape_linux",
sha256 = "b7a99b6c0ee2817706e77803643f4a6caf9e35bdec928e963c1d2ae86e5e4beb",
urls = ["https://inkscape.org/es/gallery/item/31669/Inkscape-0a00cf5-x86_64.AppImage"],
executable = True,
downloaded_file_path = "bin",
)

http_archive(
name = "pulumi_cli",
sha256 = "c0e4b0ef05dcc96f2ccd7065afc8e3d6b3b63054fd9978f271a88862664d1547",
Expand Down
17 changes: 17 additions & 0 deletions cc/inkscape/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
alias(
name = "app_image",
actual = "@inkscape_linux//file:bin",
visibility = ["//:__subpackages__"],
)

# Required to bypass FUSE restrictions on bazel.
sh_binary(
name = "bin",
srcs = ["run.sh"],
data = [":app_image"],
env = {"APP_IMAGE": "$(location :app_image)"},
visibility = [
"//ts/cmd/svgshot:__subpackages__",
],
deps = ["@bazel_tools//tools/bash/runfiles"],
)
15 changes: 15 additions & 0 deletions cc/inkscape/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

# --- begin runfiles.bash initialization v2 ---
# Copy-pasted from the Bazel Bash runfiles library v2.
set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
source "$0.runfiles/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
# --- end runfiles.bash initialization v2 ---

# bypasses FUSE issues on bazel https://github.com/AppImage/AppImageKit/pull/842
$(rlocation inkscape_linux/file/bin) --appimage-extract-and-run $@
6 changes: 4 additions & 2 deletions js/jest/rules.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,21 @@

load("@npm//jest-cli:index.bzl", "jest", _jest_test = "jest_test")

def jest_test(name, srcs, deps = [], jest_config = "//:jest.ts.config.js", link_workspace_root = True, **kwargs):
def jest_test(name, srcs, data = [], deps = [], jest_config = "//:jest.ts.config.js", link_workspace_root = True, **kwargs):
"A macro around the autogenerated jest_test rule"
templated_args = [
"--no-cache",
"--no-watchman",
"--ci",
"--colors",
"--forceExit",
]
templated_args.extend(["--config", "$(rootpath %s)" % jest_config])
for src in srcs:
templated_args.extend(["--runTestsByPath", "$(rootpath %s)" % src])

data = [jest_config] + srcs + deps + ["//js/jest:jest_reporter_js"]
data = [jest_config] + data + srcs + deps + ["//js/jest:jest_reporter_js"]

_jest_test(
name = name,
data = data,
Expand Down
8 changes: 8 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"@bazel/buildozer": "^5.1.0",
"@bazel/esbuild": "^4.5.0",
"@bazel/ibazel": "latest",
"@bazel/runfiles": "^5.4.2",
"@bazel/typescript": "^4.5.0",
"@fortawesome/fontawesome-svg-core": "^6.1.1",
"@fortawesome/free-solid-svg-icons": "^6.1.1",
Expand All @@ -26,6 +27,9 @@
"@types/node": "^17.0.35",
"@types/react": "17.0.37",
"@types/react-dom": "^17.0.11",
"@types/svgo": "^2.6.3",
"@types/tmp": "^0.2.3",
"commander": "^9.2.0",
"@types/uuid": "^8.3.4",
"@typescript-eslint/eslint-plugin": "^5.26.0",
"@typescript-eslint/parser": "^5.26.0",
Expand All @@ -40,11 +44,15 @@
"eslint-config-next": "12.1.6",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-prettier": "^4.0.0",
"grunt-cli": "^1.4.3",
"http-server": "^14.1.0",
"jest-cli": "^27.4.5",
"jsdom": "^19.0.0",
"mime": "^3.0.0",
"module-alias": "^2.2.2",
"puppeteer": "^13.7.0",
"svgo": "^2.8.0",
"tmp": "^0.2.1",
"prettier": "^2.6.2",
"react-router": "^6.3.0",
"react-router-dom": "^6.3.0",
Expand Down
43 changes: 43 additions & 0 deletions ts/cmd/svgshot/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
load("//:rules.bzl", "jest_test", "nodejs_binary", "ts_project")

ts_project(
name = "project",
srcs = [
"index.ts",
"lib.ts",
"svgshot_test.ts",
],
deps = [
"@npm//@bazel/runfiles",
"@npm//@types/jest",
"@npm//@types/node",
"@npm//@types/svgo",
"@npm//@types/tmp",
"@npm//commander",
"@npm//puppeteer",
"@npm//svgo",
"@npm//tmp",
],
)

nodejs_binary(
name = "svgshot",
args = [
"--inkscapeBin",
"$(location //cc/inkscape:bin)",
],
data = [
":project_ts",
"//cc/inkscape:bin",
],
entry_point = "index.ts",
)

jest_test(
name = "tests",
srcs = ["svgshot_test.js"],
data = [
"//cc/inkscape:bin",
],
project_deps = [":project"],
)
87 changes: 87 additions & 0 deletions ts/cmd/svgshot/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
svgshot
=============================================================================
[svgshot]: #svgshot

Svgshot takes 'screenshots' of webpages as minmised SVGs. This makes them
great for rendering in videos or webpages.


Example
-----------------------------------------------------------------------------
[Example]: #example


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bash
svgshot https://en.wikipedia.org
# loading https://en.wikipedia.org
# writing Wikipedia__the_free_encyclopedia.svg
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[Wikipedia SVG]: ./Wikipedia__the_free_encyclopedia.svg
![Wikipedia SVG]

With `--block` for removing text¹:

[Twitch SVG]: ./Twitch.svg
![Twitch SVG]

[Examples directory]: ./examples
For more examples, take a look at the [Examples directory].

[BLOKK font]: http://www.blokkfont.com/

¹ Orginally this was intended to block out text like the [BLOKK font], but
I couldn't do this without creating truly huge SVGs. If you have any ideas
as to how this could be achieved, let me know!

Installation
-----------------------------------------------------------------------------
[Installation]: #Installation

With node and `inkscape` installed:
```bash
npm install -g svgshot
```

If you don't have `inkscape` installed on windows, try `scoop`:
```powershell
scoop install inkscape
```

For temporary usage you might want to use `npx`:
```bash
npx svgshot https://en.wikipedia.org
```

TODO
-----------------------------------------------------------------------------
Replace SVG dimensions with viewBox so they dont get weirdly warped when
rendered at the wrong size:
https://gist.github.com/fyrebase/4604f540bc4a329ff3bfde225775d39e

License
-----------------------------------------------------------------------------
[License]: #license


MIT License

Copyright (c) 2019 Zemnmez

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/Twitch.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/Wikipedia__the_free_encyclopedia.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
147 changes: 147 additions & 0 deletions ts/cmd/svgshot/dist/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/usr/bin/env node
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k];
result["default"] = mod;
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
const puppeteer_1 = __importDefault(require("puppeteer"));
const tmp = __importStar(require("tmp"));
const child_process_1 = require("child_process");
const svgo_1 = __importDefault(require("svgo"));
const fs_1 = require("fs");
const util_1 = require("util");
const svgoPlugins = [{ cleanupAttrs: true, }, { removeDoctype: true, }, { removeXMLProcInst: true, }, { removeComments: true, }, { removeMetadata: true, }, { removeTitle: true, }, { removeDesc: true, }, { removeUselessDefs: true, }, { removeEditorsNSData: true, }, { removeEmptyAttrs: true, }, { removeHiddenElems: true, }, { removeEmptyText: true, }, { removeEmptyContainers: true, }, { removeViewBox: false, }, { cleanupEnableBackground: true, }, { convertColors: true, }, { convertPathData: true, }, { convertTransform: true, }, { removeUnknownsAndDefaults: true, }, { removeNonInheritableGroupAttrs: true, }, { removeUselessStrokeAndFill: true, }, { removeUnusedNS: true, }, { cleanupIDs: true, }, { cleanupNumericValues: true, }, { moveElemsAttrsToGroup: true, }, { moveGroupAttrsToElems: true, }, { collapseGroups: true, }, { removeRasterImages: false, }, { mergePaths: true, }, { convertShapeToPath: true, }, { sortAttrs: true, }];
const program = require('commander');
program
.name("svgshot")
.usage("<urls...>")
.description('take svg screenshots of webpages. requires the inkscape cli tool')
.option('-s, --scale <scale>', 'scale of the render. must be between 1 and 2', 1)
.option('--no-background', 'do not render backgounds')
.option('--width <width>', 'Width; using px, mm or in (as though printed)', '1000px')
.option('--height <height>', 'Height; using px, mm or in (as though printed)', '1000px')
.option('--media <media>', 'CSS @page media', 'screen')
.option('--timeout <milliseconds>', 'Maximum time to wait for page to become idle before taking screenshot', 10000)
.option('--throttle <n>', 'Maximum number of pages to load at once. set to `1` for sequential operation', 10)
.option('--block', "make text invisible for presentation (it's still in the file though)", false)
.option('--headful', "run in a visible chromium instance (useful for debugging). also implicitly retains the chromium instance", false);
program.parse(process.argv);
const { background, width, height, media, scale, timeout, throttle: throttleN, block, headful } = program;
const args = program.args;
const isValidMedia = (s) => s == "screen" || s == "print";
if (!isValidMedia(media))
throw new Error(`invalid media type ${media}; must be "screen" or "print"`);
const map = async function* (f, iter) {
let n = 0;
for await (let value of iter)
yield (await f)(value, n++);
};
const chunk = (size) => (iter) => (async function* () {
let bucket = [];
for await (let value of iter) {
bucket.push(value);
if (bucket.length == await size) {
yield bucket;
bucket = [];
}
}
})();
const EventuallyIterable = async function* (I) {
for await (let value of I)
yield await value;
};
/** perform a promise iterator lazily in chunks */
const chunkedPromise = (N) => (I) => map(v => Promise.all(v), chunk(N)(I));
;
const flat = async function* (I) {
for await (let chunk of I)
for await (let member of chunk)
yield member;
};
/** lazily completes the given async iterable in chunks of given size */
const throttle = N => I => flat(EventuallyIterable(chunkedPromise(N)(I)));
const main = async () => {
const browser = await puppeteer_1.default.launch({
headless: !headful,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu'] // unfortunate, but needed to work with wsl...
});
const captures = map(async (url, i) => {
console.warn("loading", url);
const loading = setInterval(() => {
console.warn("still loading", url);
}, timeout / 2);
const page = await browser.newPage();
try {
await page.goto(url, {
waitUntil: 'networkidle2',
timeout
});
}
catch (e) {
// if the network doesn't go idle, we still take the screenshot
}
clearInterval(loading);
if (block) {
const loading = setInterval(() => {
console.warn("waiting for injected style...", url);
}, timeout / 2);
await page.evaluate(() => {
const s = document.createElement("style");
s.innerHTML = `* { color: transparent !important }`;
document.head.appendChild(s);
/*
const d = window.document;
const y = d.createTreeWalker(d.body, 4);
for(;y.nextNode();y.currentNode.textContent=y.currentNode!.textContent!.replace(/\S/g, '…'));
*/
});
clearInterval(loading);
}
await page.emulateMediaType(media);
const pdf = await page.pdf({
scale: scale,
printBackground: background,
width: width,
height: height,
margin: { top: 0, right: 0, left: 0, bottom: 0 }
});
const [pdfFile, svgFile] = await Promise.all(['.pdf', '.svg'].map(async (extension) => {
return new Promise((ok, err) => {
tmp.file({ postfix: extension }, (error, path) => {
if (error)
return err(error);
return ok(path);
});
});
}));
await util_1.promisify(fs_1.writeFile)(pdfFile, pdf);
const line = `inkscape --without-gui ${pdfFile} --export-plain-svg ${svgFile}`;
try {
await util_1.promisify(child_process_1.exec)(line);
}
catch (e) {
throw new Error(`failed to run ${line} with ${e} -- make sure you have inkscape installed and in your PATH`);
}
const svgo = new svgo_1.default({
plugins: svgoPlugins
});
const title = ((await page.title()).trim() || page.url()).replace(/[^A-z_-]/g, "_");
const fileName = title + ".svg";
const svgContents = await util_1.promisify(fs_1.readFile)(svgFile, 'utf8');
const optimSvg = await svgo.optimize(svgContents.toString(), { path: svgFile });
console.warn(`writing ${i + 1}/${args.length} ${fileName} (${width} x ${height})`);
await util_1.promisify(fs_1.writeFile)(fileName, optimSvg.data);
}, args);
for await (let _ of throttle(throttleN)(captures))
;
if (!headful)
await browser.close();
};
main().catch(e => { console.error(e); process.exit(1); }).then(() => process.exit(0));
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/Apple.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/Hacker_News.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/Home_-_BBC_News.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/Music_for_everyone_-_Spotify.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/Twitch.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/Where_work_happens___Slack.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/YouTube.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions ts/cmd/svgshot/examples/______zemnmez__on_Twitter.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit ee2c37f

Please sign in to comment.