Initial commit

This commit is contained in:
Zuckerberg 2022-03-29 23:59:19 -04:00
parent 61e4f18e0d
commit 39d338b9b2
8 changed files with 4139 additions and 0 deletions

17
composition.nix Normal file
View File

@ -0,0 +1,17 @@
# This file has been generated by node2nix 1.9.0. Do not edit!
{pkgs ? import <nixpkgs> {
inherit system;
}, system ? builtins.currentSystem, nodejs ? pkgs."nodejs-12_x"}:
let
nodeEnv = import ./node-env.nix {
inherit (pkgs) stdenv lib python2 runCommand writeTextFile writeShellScript;
inherit pkgs nodejs;
libtool = if pkgs.stdenv.isDarwin then pkgs.darwin.cctools else null;
};
in
import ./node-packages.nix {
inherit (pkgs) fetchurl nix-gitignore stdenv lib fetchgit;
inherit nodeEnv;
}

43
flake.lock generated Normal file
View File

@ -0,0 +1,43 @@
{
"nodes": {
"flake-utils": {
"locked": {
"lastModified": 1648297722,
"narHash": "sha256-W+qlPsiZd8F3XkzXOzAoR+mpFqzm3ekQkJNa+PIh1BQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "0f8662f1319ad6abf89b3380dd2722369fc51ade",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1638239011,
"narHash": "sha256-AjhmbT4UBlJWqxY0ea8a6GU2C2HdKUREkG43oRr3TZg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "a7ecde854aee5c4c7cd6177f54a99d2c1ff28a31",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "21.11",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

32
flake.nix Normal file
View File

@ -0,0 +1,32 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/21.11";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils, ... }:
let
supportedSystems = with flake-utils.lib.system; [ x86_64-linux i686-linux aarch64-linux ];
in {
nixosModule = import ./module.nix;
} // flake-utils.lib.eachSystem supportedSystems (system:
{
checks.install =
with import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; };
simpleTest {
machine = { config, pkgs, ... }: {
imports = [ self.nixosModule ];
virtualisation.memorySize = 256;
services.archivebox.enable = true;
};
testScript = ''
machine.wait_for_unit("archivebox")
'';
};
}
);
}

3
generate.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
rm -f ./node-env.nix
nix run nixpkgs#nodePackages.node2nix -- -i node-packages.json -o node-packages.nix -c composition.nix --no-out-link

442
module.nix Normal file
View File

@ -0,0 +1,442 @@
{ pkgs, lib, config, ... }:
with lib;
# TODO pocket integration (POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS)
# TODO fix http timeout?
let
cfg = config.services.archivebox;
archiveboxPkgs = import ./composition.nix { inherit pkgs; };
mercury-parser = archiveboxPkgs."@postlight/mercury-parser";
readability-extractor = archiveboxPkgs."readability-extractor-git+https://github.com/ArchiveBox/readability-extractor.git";
single-file = archiveboxPkgs."single-file-git+https://github.com/gildas-lormeau/SingleFile.git";
in {
options.services.archivebox = {
enable = mkEnableOption "Enable ArchiveBox";
dataDir = mkOption {
type = types.str;
default = "/var/lib/archivebox";
description = ''
Path to the archivebox data directory
'';
};
listenAddress = mkOption {
type = types.str;
default = "localhost";
example = "127.0.0.1";
description = ''
The address archivebox should listen to
'';
};
listenPort = mkOption {
type = types.int;
default = 37226;
example = 1357;
description = ''
The port archivebox should listen on
'';
};
user = mkOption {
type = types.str;
default = "archivebox";
description = ''
The user archivebox should run as
'';
};
group = mkOption {
type = types.str;
default = "archivebox";
description = ''
The group archivebox should run as
'';
};
timeout = mkOption {
type = types.int;
default = 60;
example = 120;
description = ''
Maximum allowed download time per archive method for each link in seconds
'';
};
snapshotsPerPage = mkOption {
type = types.int;
default = 40;
example = 100;
description = ''
Maximum number of Snapshots to show per page on Snapshot list pages
'';
};
footerInfo = mkOption {
type = types.nullOr types.str;
default = null;
example = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.";
description = ''
Some text to display in the footer of the archive index.
Useful for providing server admin contact info to respond to takedown requests.
'';
};
urlBlacklist = mkOption {
type = types.nullOr types.str;
default = null;
example = "\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$";
description = ''
A regex expression used to exclude certain URLs from archiving.
'';
};
urlWhitelist = mkOption {
type = types.nullOr types.str;
default = null;
example = "^http(s)?:\\/\\/(.+)?example\\.com\\/?.*$";
description = ''
A regex expression used to exclude all URLs that don't match the given pattern from archiving
'';
};
saveTitle = mkOption {
type = types.bool;
default = true;
description = ''
Save the title of the webpage
'';
};
saveFavicon = mkOption {
type = types.bool;
default = true;
description = ''
Save the favicon of the webpage
'';
};
saveWget = mkOption {
type = types.bool;
default = true;
description = ''
Save the webpage with wget
'';
};
saveWgetRequisites = mkOption {
type = types.bool;
default = true;
description = ''
Fetch images/css/js with wget. (True is highly recommended, otherwise your won't download many critical assets to render the page, like images, js, css, etc.)
'';
};
wgetUserAgent = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
This is the user agent to use during wget archiving.
'';
};
wgetCookiesFile = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Cookies file to pass to wget. To capture sites that require a user to be logged in,
you can specify a path to a netscape-format cookies.txt file for wget to use.
'';
};
saveWARC = mkOption {
type = types.bool;
default = true;
description = ''
Save a timestamped WARC archive of all the page requests and responses during the wget archive process.
'';
};
savePDF = mkOption {
type = types.bool;
default = true;
description = ''
Print page as PDF. (Uses chromium)
'';
};
saveScreenshot = mkOption {
type = types.bool;
default = true;
description = ''
Fetch a screenshot of the page. (Uses chromium)
'';
};
screenshotResolution = mkOption {
type = types.str;
default = "1440,2000";
example = "1024,768";
description = ''
Screenshot resolution in pixels width,height.
'';
};
saveDOM = mkOption {
type = types.bool;
default = true;
description = ''
Fetch a DOM dump of the page. (Uses chromium)
'';
};
saveHeaders = mkOption {
type = types.bool;
default = true;
description = ''
Save the webpage's response headers
'';
};
saveSingleFile = mkOption {
type = types.bool;
default = true;
description = ''
Fetch an HTML file with all assets embedded using Single File. (Uses chromium) https://github.com/gildas-lormeau/SingleFile
'';
};
saveReadability = mkOption {
type = types.bool;
default = true;
description = ''
Extract article text, summary, and byline using Mozilla's Readability library. https://github.com/mozilla/readability
Unlike the other methods, this does not download any additional files, so it's practically free from a disk usage perspective.
'';
};
saveMercury = mkOption {
type = types.bool;
default = true;
description = ''
Extract article text, summary, and byline using the Mercury library. https://github.com/postlight/mercury-parser
Unlike the other methods, this does not download any additional files, so it's practically free from a disk usage perspective.
'';
};
saveGit = mkOption {
type = types.bool;
default = true;
description = ''
Fetch any git repositories on the page.
'';
};
gitDomains = mkOption {
type = types.nullOr types.str;
default = null;
example = "git.example.com";
description = ''
Domains to attempt download of git repositories on using `git clone`
'';
};
saveMedia = mkOption {
type = types.bool;
default = true;
description = ''
Fetch all audio, video, annotations, and media metadata on the page using `yt-dlp`.
Warning, this can use up a lot of storage very quickly.
'';
};
mediaTimeout = mkOption {
type = types.int;
default = 3600;
example = 120;
description = ''
Maximum allowed download time for fetching media
'';
};
mediaMaxSize = mkOption {
type = types.nullOr types.str;
default = null;
example = "750m";
description = ''
Maxium size of media to download
'';
};
saveArchiveDotOrg = mkOption {
type = types.bool;
default = true;
description = ''
Submit the page's URL to be archived on Archive.org. (The Internet Archive)
'';
};
checkSSLCert = mkOption {
type = types.bool;
default = true;
description = ''
Whether to enforce HTTPS certificate and HSTS chain of trust when archiving sites.
Set this to False if you want to archive pages even if they have expired or invalid certificates.
Be aware that when False you cannot guarantee that you have not been man-in-the-middle'd while archiving content.
'';
};
curlUserAgent = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
This is the user agent to use during curl archiving.
'';
};
chromiumUserAgent = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
This is the user agent to use during Chromium headless archiving.
'';
};
chromiumUserDataDir = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Path to a Chrome user profile directory.
'';
};
publicCreateSnapshots = mkOption {
type = types.bool;
default = false;
description = ''
Anon users can add URLs to be archived
'';
};
publicViewSnapshots = mkOption {
type = types.bool;
default = true;
description = ''
Anon users can view archived pages
'';
};
publicViewIndex = mkOption {
type = types.bool;
default = true;
description = ''
Anon users can view the archive index
'';
};
};
config = mkIf cfg.enable {
users.users.${cfg.user} =
if cfg.user == "archivebox" then {
isSystemUser = true;
group = cfg.group;
home = cfg.dataDir;
createHome = true;
}
else {};
users.groups.${cfg.group} = {};
systemd.services.archivebox = {
enable = true;
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig.ExecStart = "${pkgs.archivebox}/bin/archivebox server";
serviceConfig.PrivateTmp="yes";
serviceConfig.User = cfg.user;
serviceConfig.Group = cfg.group;
environment = let
boolToStr = bool: if bool then "true" else "false";
useCurl = cfg.saveArchiveDotOrg || cfg.saveFavicon || cfg.saveHeaders || cfg.saveTitle;
useGit = cfg.saveGit;
useWget = cfg.saveWget;
useSinglefile = cfg.saveSingleFile;
useReadability = cfg.saveReadability;
useMercury = cfg.saveMercury;
useYtdlp = cfg.saveMedia;
useChromium = cfg.saveDOM || cfg.savePDF || cfg.saveScreenshot || cfg.saveSingleFile;
in {
SAVE_TITLE = boolToStr cfg.saveTitle;
SAVE_FAVICON = boolToStr cfg.saveFavicon;
SAVE_WGET = boolToStr cfg.saveWget;
SAVE_WGET_REQUISITES = boolToStr cfg.saveWgetRequisites;
SAVE_SINGLEFILE = boolToStr cfg.saveSingleFile;
SAVE_READABILITY = boolToStr cfg.saveReadability;
SAVE_MERCURY = boolToStr cfg.saveMercury;
SAVE_PDF = boolToStr cfg.savePDF;
SAVE_SCREENSHOT = boolToStr cfg.saveScreenshot;
SAVE_DOM = boolToStr cfg.saveDOM;
SAVE_HEADERS = boolToStr cfg.saveHeaders;
SAVE_WARC = boolToStr cfg.saveWARC;
SAVE_GIT = boolToStr cfg.saveGit;
SAVE_MEDIA = boolToStr cfg.saveMedia;
SAVE_ARCHIVE_DOT_ORG = boolToStr cfg.saveArchiveDotOrg;
TIMEOUT = toString cfg.timeout;
MEDIA_TIMEOUT = toString cfg.mediaTimeout;
URL_BLACKLIST = cfg.urlBlacklist;
URL_WHITELIST = cfg.urlWhitelist;
BIND_ADDR = "${cfg.listenAddress}:${toString cfg.listenPort}";
PUBLIC_INDEX = boolToStr cfg.publicViewIndex;
PUBLIC_SNAPSHOTS = boolToStr cfg.publicViewSnapshots;
PUBLIC_ADD_VIEW = boolToStr cfg.publicCreateSnapshots;
FOOTER_INFO = cfg.footerInfo;
SNAPSHOTS_PER_PAGE = toString cfg.snapshotsPerPage;
RESOLUTION = cfg.screenshotResolution;
GIT_DOMAINS = cfg.gitDomains;
CHECK_SSL_VALIDITY = boolToStr cfg.checkSSLCert;
MEDIA_MAX_SIZE = cfg.mediaMaxSize;
CURL_USER_AGENT = cfg.curlUserAgent;
WGET_USER_AGENT = cfg.wgetUserAgent;
CHROME_USER_AGENT = cfg.chromiumUserAgent;
COOKIES_FILE = cfg.wgetCookiesFile;
CHROME_USER_DATA_DIR = cfg.chromiumUserDataDir;
CURL_BINARY = if useCurl then "${pkgs.curl}/bin/curl" else null;
GIT_BINARY = if useGit then "${pkgs.git}/bin/git" else null;
WGET_BINARY = if useWget then "${pkgs.wget}/bin/wget" else null;
SINGLEFILE_BINARY = if useSinglefile then "${single-file}/bin/single-file" else null;
READABILITY_BINARY = if useReadability then "${readability-extractor}/bin/readability-extractor" else null;
MERCURY_BINARY = if useMercury then "${mercury-parser}/bin/mercury-parser" else null;
YOUTUBEDL_BINARY = if useYtdlp then "${pkgs.yt-dlp}/bin/yt-dlp" else null;
NODE_BINARY = "${pkgs.nodejs}/bin/nodejs"; # is this really needed? Nix already includes nodejs inside packages where needed
RIPGREP_BINARY = "${pkgs.ripgrep}/bin/rg";
CHROME_BINARY = if useChromium then "${pkgs.chromium}/bin/chromium-browser" else null;
USE_CURL = boolToStr useCurl;
USE_WGET = boolToStr useWget;
USE_SINGLEFILE = boolToStr useSinglefile;
USE_READABILITY = boolToStr useReadability;
USE_MERCURY = boolToStr useMercury;
USE_GIT = boolToStr useGit;
USE_CHROME = boolToStr useChromium;
USE_YOUTUBEDL = boolToStr useYtdlp;
USE_RIPGREP = boolToStr true;
OUTPUT_DIR = cfg.dataDir;
};
preStart = ''
mkdir -p ${cfg.dataDir}
chown ${cfg.user}:${cfg.group} ${cfg.dataDir}
# initalize/migrate data directory
cd ${cfg.dataDir}
${pkgs.archivebox}/bin/archivebox init
'';
};
};
}

588
node-env.nix Normal file
View File

@ -0,0 +1,588 @@
# This file originates from node2nix
{lib, stdenv, nodejs, python2, pkgs, libtool, runCommand, writeTextFile, writeShellScript}:
let
# Workaround to cope with utillinux in Nixpkgs 20.09 and util-linux in Nixpkgs master
utillinux = if pkgs ? utillinux then pkgs.utillinux else pkgs.util-linux;
python = if nodejs ? python then nodejs.python else python2;
# Create a tar wrapper that filters all the 'Ignoring unknown extended header keyword' noise
tarWrapper = runCommand "tarWrapper" {} ''
mkdir -p $out/bin
cat > $out/bin/tar <<EOF
#! ${stdenv.shell} -e
$(type -p tar) "\$@" --warning=no-unknown-keyword --delay-directory-restore
EOF
chmod +x $out/bin/tar
'';
# Function that generates a TGZ file from a NPM project
buildNodeSourceDist =
{ name, version, src, ... }:
stdenv.mkDerivation {
name = "node-tarball-${name}-${version}";
inherit src;
buildInputs = [ nodejs ];
buildPhase = ''
export HOME=$TMPDIR
tgzFile=$(npm pack | tail -n 1) # Hooks to the pack command will add output (https://docs.npmjs.com/misc/scripts)
'';
installPhase = ''
mkdir -p $out/tarballs
mv $tgzFile $out/tarballs
mkdir -p $out/nix-support
echo "file source-dist $out/tarballs/$tgzFile" >> $out/nix-support/hydra-build-products
'';
};
# Common shell logic
installPackage = writeShellScript "install-package" ''
installPackage() {
local packageName=$1 src=$2
local strippedName
local DIR=$PWD
cd $TMPDIR
unpackFile $src
# Make the base dir in which the target dependency resides first
mkdir -p "$(dirname "$DIR/$packageName")"
if [ -f "$src" ]
then
# Figure out what directory has been unpacked
packageDir="$(find . -maxdepth 1 -type d | tail -1)"
# Restore write permissions to make building work
find "$packageDir" -type d -exec chmod u+x {} \;
chmod -R u+w "$packageDir"
# Move the extracted tarball into the output folder
mv "$packageDir" "$DIR/$packageName"
elif [ -d "$src" ]
then
# Get a stripped name (without hash) of the source directory.
# On old nixpkgs it's already set internally.
if [ -z "$strippedName" ]
then
strippedName="$(stripHash $src)"
fi
# Restore write permissions to make building work
chmod -R u+w "$strippedName"
# Move the extracted directory into the output folder
mv "$strippedName" "$DIR/$packageName"
fi
# Change to the package directory to install dependencies
cd "$DIR/$packageName"
}
'';
# Bundle the dependencies of the package
#
# Only include dependencies if they don't exist. They may also be bundled in the package.
includeDependencies = {dependencies}:
lib.optionalString (dependencies != []) (
''
mkdir -p node_modules
cd node_modules
''
+ (lib.concatMapStrings (dependency:
''
if [ ! -e "${dependency.name}" ]; then
${composePackage dependency}
fi
''
) dependencies)
+ ''
cd ..
''
);
# Recursively composes the dependencies of a package
composePackage = { name, packageName, src, dependencies ? [], ... }@args:
builtins.addErrorContext "while evaluating node package '${packageName}'" ''
installPackage "${packageName}" "${src}"
${includeDependencies { inherit dependencies; }}
cd ..
${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."}
'';
pinpointDependencies = {dependencies, production}:
let
pinpointDependenciesFromPackageJSON = writeTextFile {
name = "pinpointDependencies.js";
text = ''
var fs = require('fs');
var path = require('path');
function resolveDependencyVersion(location, name) {
if(location == process.env['NIX_STORE']) {
return null;
} else {
var dependencyPackageJSON = path.join(location, "node_modules", name, "package.json");
if(fs.existsSync(dependencyPackageJSON)) {
var dependencyPackageObj = JSON.parse(fs.readFileSync(dependencyPackageJSON));
if(dependencyPackageObj.name == name) {
return dependencyPackageObj.version;
}
} else {
return resolveDependencyVersion(path.resolve(location, ".."), name);
}
}
}
function replaceDependencies(dependencies) {
if(typeof dependencies == "object" && dependencies !== null) {
for(var dependency in dependencies) {
var resolvedVersion = resolveDependencyVersion(process.cwd(), dependency);
if(resolvedVersion === null) {
process.stderr.write("WARNING: cannot pinpoint dependency: "+dependency+", context: "+process.cwd()+"\n");
} else {
dependencies[dependency] = resolvedVersion;
}
}
}
}
/* Read the package.json configuration */
var packageObj = JSON.parse(fs.readFileSync('./package.json'));
/* Pinpoint all dependencies */
replaceDependencies(packageObj.dependencies);
if(process.argv[2] == "development") {
replaceDependencies(packageObj.devDependencies);
}
replaceDependencies(packageObj.optionalDependencies);
/* Write the fixed package.json file */
fs.writeFileSync("package.json", JSON.stringify(packageObj, null, 2));
'';
};
in
''
node ${pinpointDependenciesFromPackageJSON} ${if production then "production" else "development"}
${lib.optionalString (dependencies != [])
''
if [ -d node_modules ]
then
cd node_modules
${lib.concatMapStrings (dependency: pinpointDependenciesOfPackage dependency) dependencies}
cd ..
fi
''}
'';
# Recursively traverses all dependencies of a package and pinpoints all
# dependencies in the package.json file to the versions that are actually
# being used.
pinpointDependenciesOfPackage = { packageName, dependencies ? [], production ? true, ... }@args:
''
if [ -d "${packageName}" ]
then
cd "${packageName}"
${pinpointDependencies { inherit dependencies production; }}
cd ..
${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."}
fi
'';
# Extract the Node.js source code which is used to compile packages with
# native bindings
nodeSources = runCommand "node-sources" {} ''
tar --no-same-owner --no-same-permissions -xf ${nodejs.src}
mv node-* $out
'';
# Script that adds _integrity fields to all package.json files to prevent NPM from consulting the cache (that is empty)
addIntegrityFieldsScript = writeTextFile {
name = "addintegrityfields.js";
text = ''
var fs = require('fs');
var path = require('path');
function augmentDependencies(baseDir, dependencies) {
for(var dependencyName in dependencies) {
var dependency = dependencies[dependencyName];
// Open package.json and augment metadata fields
var packageJSONDir = path.join(baseDir, "node_modules", dependencyName);
var packageJSONPath = path.join(packageJSONDir, "package.json");
if(fs.existsSync(packageJSONPath)) { // Only augment packages that exist. Sometimes we may have production installs in which development dependencies can be ignored
console.log("Adding metadata fields to: "+packageJSONPath);
var packageObj = JSON.parse(fs.readFileSync(packageJSONPath));
if(dependency.integrity) {
packageObj["_integrity"] = dependency.integrity;
} else {
packageObj["_integrity"] = "sha1-000000000000000000000000000="; // When no _integrity string has been provided (e.g. by Git dependencies), add a dummy one. It does not seem to harm and it bypasses downloads.
}
if(dependency.resolved) {
packageObj["_resolved"] = dependency.resolved; // Adopt the resolved property if one has been provided
} else {
packageObj["_resolved"] = dependency.version; // Set the resolved version to the version identifier. This prevents NPM from cloning Git repositories.
}
if(dependency.from !== undefined) { // Adopt from property if one has been provided
packageObj["_from"] = dependency.from;
}
fs.writeFileSync(packageJSONPath, JSON.stringify(packageObj, null, 2));
}
// Augment transitive dependencies
if(dependency.dependencies !== undefined) {
augmentDependencies(packageJSONDir, dependency.dependencies);
}
}
}
if(fs.existsSync("./package-lock.json")) {
var packageLock = JSON.parse(fs.readFileSync("./package-lock.json"));
if(![1, 2].includes(packageLock.lockfileVersion)) {
process.stderr.write("Sorry, I only understand lock file versions 1 and 2!\n");
process.exit(1);
}
if(packageLock.dependencies !== undefined) {
augmentDependencies(".", packageLock.dependencies);
}
}
'';
};
# Reconstructs a package-lock file from the node_modules/ folder structure and package.json files with dummy sha1 hashes
reconstructPackageLock = writeTextFile {
name = "addintegrityfields.js";
text = ''
var fs = require('fs');
var path = require('path');
var packageObj = JSON.parse(fs.readFileSync("package.json"));
var lockObj = {
name: packageObj.name,
version: packageObj.version,
lockfileVersion: 1,
requires: true,
dependencies: {}
};
function augmentPackageJSON(filePath, dependencies) {
var packageJSON = path.join(filePath, "package.json");
if(fs.existsSync(packageJSON)) {
var packageObj = JSON.parse(fs.readFileSync(packageJSON));
dependencies[packageObj.name] = {
version: packageObj.version,
integrity: "sha1-000000000000000000000000000=",
dependencies: {}
};
processDependencies(path.join(filePath, "node_modules"), dependencies[packageObj.name].dependencies);
}
}
function processDependencies(dir, dependencies) {
if(fs.existsSync(dir)) {
var files = fs.readdirSync(dir);
files.forEach(function(entry) {
var filePath = path.join(dir, entry);
var stats = fs.statSync(filePath);
if(stats.isDirectory()) {
if(entry.substr(0, 1) == "@") {
// When we encounter a namespace folder, augment all packages belonging to the scope
var pkgFiles = fs.readdirSync(filePath);
pkgFiles.forEach(function(entry) {
if(stats.isDirectory()) {
var pkgFilePath = path.join(filePath, entry);
augmentPackageJSON(pkgFilePath, dependencies);
}
});
} else {
augmentPackageJSON(filePath, dependencies);
}
}
});
}
}
processDependencies("node_modules", lockObj.dependencies);
fs.writeFileSync("package-lock.json", JSON.stringify(lockObj, null, 2));
'';
};
prepareAndInvokeNPM = {packageName, bypassCache, reconstructLock, npmFlags, production}:
let
forceOfflineFlag = if bypassCache then "--offline" else "--registry http://www.example.com";
in
''
# Pinpoint the versions of all dependencies to the ones that are actually being used
echo "pinpointing versions of dependencies..."
source $pinpointDependenciesScriptPath
# Patch the shebangs of the bundled modules to prevent them from
# calling executables outside the Nix store as much as possible
patchShebangs .
# Deploy the Node.js package by running npm install. Since the
# dependencies have been provided already by ourselves, it should not
# attempt to install them again, which is good, because we want to make
# it Nix's responsibility. If it needs to install any dependencies
# anyway (e.g. because the dependency parameters are
# incomplete/incorrect), it fails.
#
# The other responsibilities of NPM are kept -- version checks, build
# steps, postprocessing etc.
export HOME=$TMPDIR
cd "${packageName}"
runHook preRebuild
${lib.optionalString bypassCache ''
${lib.optionalString reconstructLock ''
if [ -f package-lock.json ]
then
echo "WARNING: Reconstruct lock option enabled, but a lock file already exists!"
echo "This will most likely result in version mismatches! We will remove the lock file and regenerate it!"
rm package-lock.json
else
echo "No package-lock.json file found, reconstructing..."
fi
node ${reconstructPackageLock}
''}
node ${addIntegrityFieldsScript}
''}
npm ${forceOfflineFlag} --nodedir=${nodeSources} ${npmFlags} ${lib.optionalString production "--production"} rebuild
if [ "''${dontNpmInstall-}" != "1" ]
then
# NPM tries to download packages even when they already exist if npm-shrinkwrap is used.
rm -f npm-shrinkwrap.json
npm ${forceOfflineFlag} --nodedir=${nodeSources} ${npmFlags} ${lib.optionalString production "--production"} install
fi
'';
# Builds and composes an NPM package including all its dependencies
buildNodePackage =
{ name
, packageName
, version
, dependencies ? []
, buildInputs ? []
, production ? true
, npmFlags ? ""
, dontNpmInstall ? false
, bypassCache ? false
, reconstructLock ? false
, preRebuild ? ""
, dontStrip ? true
, unpackPhase ? "true"
, buildPhase ? "true"
, meta ? {}
, ... }@args:
let
extraArgs = removeAttrs args [ "name" "dependencies" "buildInputs" "dontStrip" "dontNpmInstall" "preRebuild" "unpackPhase" "buildPhase" "meta" ];
in
stdenv.mkDerivation ({
name = "${name}-${version}";
buildInputs = [ tarWrapper python nodejs ]
++ lib.optional (stdenv.isLinux) utillinux
++ lib.optional (stdenv.isDarwin) libtool
++ buildInputs;
inherit nodejs;
inherit dontStrip; # Stripping may fail a build for some package deployments
inherit dontNpmInstall preRebuild unpackPhase buildPhase;
compositionScript = composePackage args;
pinpointDependenciesScript = pinpointDependenciesOfPackage args;
passAsFile = [ "compositionScript" "pinpointDependenciesScript" ];
installPhase = ''
source ${installPackage}
# Create and enter a root node_modules/ folder
mkdir -p $out/lib/node_modules
cd $out/lib/node_modules
# Compose the package and all its dependencies
source $compositionScriptPath
${prepareAndInvokeNPM { inherit packageName bypassCache reconstructLock npmFlags production; }}
# Create symlink to the deployed executable folder, if applicable
if [ -d "$out/lib/node_modules/.bin" ]
then
ln -s $out/lib/node_modules/.bin $out/bin
fi
# Create symlinks to the deployed manual page folders, if applicable
if [ -d "$out/lib/node_modules/${packageName}/man" ]
then
mkdir -p $out/share
for dir in "$out/lib/node_modules/${packageName}/man/"*
do
mkdir -p $out/share/man/$(basename "$dir")
for page in "$dir"/*
do
ln -s $page $out/share/man/$(basename "$dir")
done
done
fi
# Run post install hook, if provided
runHook postInstall
'';
meta = {
# default to Node.js' platforms
platforms = nodejs.meta.platforms;
} // meta;
} // extraArgs);
# Builds a node environment (a node_modules folder and a set of binaries)
buildNodeDependencies =
{ name
, packageName
, version
, src
, dependencies ? []
, buildInputs ? []
, production ? true
, npmFlags ? ""
, dontNpmInstall ? false
, bypassCache ? false
, reconstructLock ? false
, dontStrip ? true
, unpackPhase ? "true"
, buildPhase ? "true"
, ... }@args:
let
extraArgs = removeAttrs args [ "name" "dependencies" "buildInputs" ];
in
stdenv.mkDerivation ({
name = "node-dependencies-${name}-${version}";
buildInputs = [ tarWrapper python nodejs ]
++ lib.optional (stdenv.isLinux) utillinux
++ lib.optional (stdenv.isDarwin) libtool
++ buildInputs;
inherit dontStrip; # Stripping may fail a build for some package deployments
inherit dontNpmInstall unpackPhase buildPhase;
includeScript = includeDependencies { inherit dependencies; };
pinpointDependenciesScript = pinpointDependenciesOfPackage args;
passAsFile = [ "includeScript" "pinpointDependenciesScript" ];
installPhase = ''
source ${installPackage}
mkdir -p $out/${packageName}
cd $out/${packageName}
source $includeScriptPath
# Create fake package.json to make the npm commands work properly
cp ${src}/package.json .
chmod 644 package.json
${lib.optionalString bypassCache ''
if [ -f ${src}/package-lock.json ]
then
cp ${src}/package-lock.json .
fi
''}
# Go to the parent folder to make sure that all packages are pinpointed
cd ..
${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."}
${prepareAndInvokeNPM { inherit packageName bypassCache reconstructLock npmFlags production; }}
# Expose the executables that were installed
cd ..
${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."}
mv ${packageName} lib
ln -s $out/lib/node_modules/.bin $out/bin
'';
} // extraArgs);
# Builds a development shell
buildNodeShell =
{ name
, packageName
, version
, src
, dependencies ? []
, buildInputs ? []
, production ? true
, npmFlags ? ""
, dontNpmInstall ? false
, bypassCache ? false
, reconstructLock ? false
, dontStrip ? true
, unpackPhase ? "true"
, buildPhase ? "true"
, ... }@args:
let
nodeDependencies = buildNodeDependencies args;
in
stdenv.mkDerivation {
name = "node-shell-${name}-${version}";
buildInputs = [ python nodejs ] ++ lib.optional (stdenv.isLinux) utillinux ++ buildInputs;
buildCommand = ''
mkdir -p $out/bin
cat > $out/bin/shell <<EOF
#! ${stdenv.shell} -e
$shellHook
exec ${stdenv.shell}
EOF
chmod +x $out/bin/shell
'';
# Provide the dependencies in a development shell through the NODE_PATH environment variable
inherit nodeDependencies;
shellHook = lib.optionalString (dependencies != []) ''
export NODE_PATH=${nodeDependencies}/lib/node_modules
export PATH="${nodeDependencies}/bin:$PATH"
'';
};
in
{
buildNodeSourceDist = lib.makeOverridable buildNodeSourceDist;
buildNodePackage = lib.makeOverridable buildNodePackage;
buildNodeDependencies = lib.makeOverridable buildNodeDependencies;
buildNodeShell = lib.makeOverridable buildNodeShell;
}

5
node-packages.json Normal file
View File

@ -0,0 +1,5 @@
[
"@postlight/mercury-parser"
, { "readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git" }
, { "single-file": "git+https://github.com/gildas-lormeau/SingleFile.git" }
]

3009
node-packages.nix Normal file

File diff suppressed because it is too large Load Diff