diff --git a/composition.nix b/composition.nix new file mode 100644 index 0000000..53bdef1 --- /dev/null +++ b/composition.nix @@ -0,0 +1,17 @@ +# This file has been generated by node2nix 1.9.0. Do not edit! + +{pkgs ? import { + inherit system; + }, system ? builtins.currentSystem, nodejs ? pkgs."nodejs-12_x"}: + +let + nodeEnv = import ./node-env.nix { + inherit (pkgs) stdenv lib python2 runCommand writeTextFile writeShellScript; + inherit pkgs nodejs; + libtool = if pkgs.stdenv.isDarwin then pkgs.darwin.cctools else null; + }; +in +import ./node-packages.nix { + inherit (pkgs) fetchurl nix-gitignore stdenv lib fetchgit; + inherit nodeEnv; +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..77e8aa0 --- /dev/null +++ b/flake.lock @@ -0,0 +1,43 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1648297722, + "narHash": "sha256-W+qlPsiZd8F3XkzXOzAoR+mpFqzm3ekQkJNa+PIh1BQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "0f8662f1319ad6abf89b3380dd2722369fc51ade", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1638239011, + "narHash": "sha256-AjhmbT4UBlJWqxY0ea8a6GU2C2HdKUREkG43oRr3TZg=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "a7ecde854aee5c4c7cd6177f54a99d2c1ff28a31", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "21.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..a10ccc2 --- /dev/null +++ b/flake.nix @@ -0,0 +1,32 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/21.11"; + + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils, ... }: + let + supportedSystems = with flake-utils.lib.system; [ x86_64-linux i686-linux aarch64-linux ]; + in { + nixosModule = import ./module.nix; + } // flake-utils.lib.eachSystem supportedSystems (system: + { + checks.install = + with import (nixpkgs + "/nixos/lib/testing-python.nix") { inherit system; }; + simpleTest { + machine = { config, pkgs, ... }: { + imports = [ self.nixosModule ]; + + virtualisation.memorySize = 256; + + services.archivebox.enable = true; + }; + + testScript = '' + machine.wait_for_unit("archivebox") + ''; + }; + } + ); +} \ No newline at end of file diff --git a/generate.sh b/generate.sh new file mode 100755 index 0000000..560dc5f --- /dev/null +++ b/generate.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +rm -f ./node-env.nix +nix run nixpkgs#nodePackages.node2nix -- -i node-packages.json -o node-packages.nix -c composition.nix --no-out-link \ No newline at end of file diff --git a/module.nix b/module.nix new file mode 100644 index 0000000..2f9ed67 --- /dev/null +++ b/module.nix @@ -0,0 +1,442 @@ +{ pkgs, lib, config, ... }: + +with lib; + +# TODO pocket integration (POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS) +# TODO fix http timeout? + +let + cfg = config.services.archivebox; + + archiveboxPkgs = import ./composition.nix { inherit pkgs; }; + mercury-parser = archiveboxPkgs."@postlight/mercury-parser"; + readability-extractor = archiveboxPkgs."readability-extractor-git+https://github.com/ArchiveBox/readability-extractor.git"; + single-file = archiveboxPkgs."single-file-git+https://github.com/gildas-lormeau/SingleFile.git"; +in { + options.services.archivebox = { + enable = mkEnableOption "Enable ArchiveBox"; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/archivebox"; + description = '' + Path to the archivebox data directory + ''; + }; + + listenAddress = mkOption { + type = types.str; + default = "localhost"; + example = "127.0.0.1"; + description = '' + The address archivebox should listen to + ''; + }; + + listenPort = mkOption { + type = types.int; + default = 37226; + example = 1357; + description = '' + The port archivebox should listen on + ''; + }; + + user = mkOption { + type = types.str; + default = "archivebox"; + description = '' + The user archivebox should run as + ''; + }; + + group = mkOption { + type = types.str; + default = "archivebox"; + description = '' + The group archivebox should run as + ''; + }; + + timeout = mkOption { + type = types.int; + default = 60; + example = 120; + description = '' + Maximum allowed download time per archive method for each link in seconds + ''; + }; + + snapshotsPerPage = mkOption { + type = types.int; + default = 40; + example = 100; + description = '' + Maximum number of Snapshots to show per page on Snapshot list pages + ''; + }; + + footerInfo = mkOption { + type = types.nullOr types.str; + default = null; + example = "Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."; + description = '' + Some text to display in the footer of the archive index. + Useful for providing server admin contact info to respond to takedown requests. + ''; + }; + + urlBlacklist = mkOption { + type = types.nullOr types.str; + default = null; + example = "\\.(css|js|otf|ttf|woff|woff2|gstatic\\.com|googleapis\\.com/css)(\\?.*)?$"; + description = '' + A regex expression used to exclude certain URLs from archiving. + ''; + }; + + urlWhitelist = mkOption { + type = types.nullOr types.str; + default = null; + example = "^http(s)?:\\/\\/(.+)?example\\.com\\/?.*$"; + description = '' + A regex expression used to exclude all URLs that don't match the given pattern from archiving + ''; + }; + + saveTitle = mkOption { + type = types.bool; + default = true; + description = '' + Save the title of the webpage + ''; + }; + + saveFavicon = mkOption { + type = types.bool; + default = true; + description = '' + Save the favicon of the webpage + ''; + }; + + saveWget = mkOption { + type = types.bool; + default = true; + description = '' + Save the webpage with wget + ''; + }; + + saveWgetRequisites = mkOption { + type = types.bool; + default = true; + description = '' + Fetch images/css/js with wget. (True is highly recommended, otherwise your won't download many critical assets to render the page, like images, js, css, etc.) + ''; + }; + + wgetUserAgent = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + This is the user agent to use during wget archiving. + ''; + }; + + wgetCookiesFile = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Cookies file to pass to wget. To capture sites that require a user to be logged in, + you can specify a path to a netscape-format cookies.txt file for wget to use. + ''; + }; + + saveWARC = mkOption { + type = types.bool; + default = true; + description = '' + Save a timestamped WARC archive of all the page requests and responses during the wget archive process. + ''; + }; + + savePDF = mkOption { + type = types.bool; + default = true; + description = '' + Print page as PDF. (Uses chromium) + ''; + }; + + saveScreenshot = mkOption { + type = types.bool; + default = true; + description = '' + Fetch a screenshot of the page. (Uses chromium) + ''; + }; + screenshotResolution = mkOption { + type = types.str; + default = "1440,2000"; + example = "1024,768"; + description = '' + Screenshot resolution in pixels width,height. + ''; + }; + + saveDOM = mkOption { + type = types.bool; + default = true; + description = '' + Fetch a DOM dump of the page. (Uses chromium) + ''; + }; + + saveHeaders = mkOption { + type = types.bool; + default = true; + description = '' + Save the webpage's response headers + ''; + }; + + saveSingleFile = mkOption { + type = types.bool; + default = true; + description = '' + Fetch an HTML file with all assets embedded using Single File. (Uses chromium) https://github.com/gildas-lormeau/SingleFile + ''; + }; + + saveReadability = mkOption { + type = types.bool; + default = true; + description = '' + Extract article text, summary, and byline using Mozilla's Readability library. https://github.com/mozilla/readability + Unlike the other methods, this does not download any additional files, so it's practically free from a disk usage perspective. + ''; + }; + + saveMercury = mkOption { + type = types.bool; + default = true; + description = '' + Extract article text, summary, and byline using the Mercury library. https://github.com/postlight/mercury-parser + Unlike the other methods, this does not download any additional files, so it's practically free from a disk usage perspective. + ''; + }; + + saveGit = mkOption { + type = types.bool; + default = true; + description = '' + Fetch any git repositories on the page. + ''; + }; + + gitDomains = mkOption { + type = types.nullOr types.str; + default = null; + example = "git.example.com"; + description = '' + Domains to attempt download of git repositories on using `git clone` + ''; + }; + + saveMedia = mkOption { + type = types.bool; + default = true; + description = '' + Fetch all audio, video, annotations, and media metadata on the page using `yt-dlp`. + Warning, this can use up a lot of storage very quickly. + ''; + }; + + mediaTimeout = mkOption { + type = types.int; + default = 3600; + example = 120; + description = '' + Maximum allowed download time for fetching media + ''; + }; + + mediaMaxSize = mkOption { + type = types.nullOr types.str; + default = null; + example = "750m"; + description = '' + Maxium size of media to download + ''; + }; + + saveArchiveDotOrg = mkOption { + type = types.bool; + default = true; + description = '' + Submit the page's URL to be archived on Archive.org. (The Internet Archive) + ''; + }; + + checkSSLCert = mkOption { + type = types.bool; + default = true; + description = '' + Whether to enforce HTTPS certificate and HSTS chain of trust when archiving sites. + Set this to False if you want to archive pages even if they have expired or invalid certificates. + Be aware that when False you cannot guarantee that you have not been man-in-the-middle'd while archiving content. + ''; + }; + + curlUserAgent = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + This is the user agent to use during curl archiving. + ''; + }; + + chromiumUserAgent = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + This is the user agent to use during Chromium headless archiving. + ''; + }; + + chromiumUserDataDir = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Path to a Chrome user profile directory. + ''; + }; + + publicCreateSnapshots = mkOption { + type = types.bool; + default = false; + description = '' + Anon users can add URLs to be archived + ''; + }; + + publicViewSnapshots = mkOption { + type = types.bool; + default = true; + description = '' + Anon users can view archived pages + ''; + }; + + publicViewIndex = mkOption { + type = types.bool; + default = true; + description = '' + Anon users can view the archive index + ''; + }; + }; + + config = mkIf cfg.enable { + users.users.${cfg.user} = + if cfg.user == "archivebox" then { + isSystemUser = true; + group = cfg.group; + home = cfg.dataDir; + createHome = true; + } + else {}; + users.groups.${cfg.group} = {}; + + systemd.services.archivebox = { + enable = true; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = "${pkgs.archivebox}/bin/archivebox server"; + serviceConfig.PrivateTmp="yes"; + serviceConfig.User = cfg.user; + serviceConfig.Group = cfg.group; + environment = let + boolToStr = bool: if bool then "true" else "false"; + + useCurl = cfg.saveArchiveDotOrg || cfg.saveFavicon || cfg.saveHeaders || cfg.saveTitle; + useGit = cfg.saveGit; + useWget = cfg.saveWget; + useSinglefile = cfg.saveSingleFile; + useReadability = cfg.saveReadability; + useMercury = cfg.saveMercury; + useYtdlp = cfg.saveMedia; + useChromium = cfg.saveDOM || cfg.savePDF || cfg.saveScreenshot || cfg.saveSingleFile; + in { + SAVE_TITLE = boolToStr cfg.saveTitle; + SAVE_FAVICON = boolToStr cfg.saveFavicon; + SAVE_WGET = boolToStr cfg.saveWget; + SAVE_WGET_REQUISITES = boolToStr cfg.saveWgetRequisites; + SAVE_SINGLEFILE = boolToStr cfg.saveSingleFile; + SAVE_READABILITY = boolToStr cfg.saveReadability; + SAVE_MERCURY = boolToStr cfg.saveMercury; + SAVE_PDF = boolToStr cfg.savePDF; + SAVE_SCREENSHOT = boolToStr cfg.saveScreenshot; + SAVE_DOM = boolToStr cfg.saveDOM; + SAVE_HEADERS = boolToStr cfg.saveHeaders; + SAVE_WARC = boolToStr cfg.saveWARC; + SAVE_GIT = boolToStr cfg.saveGit; + SAVE_MEDIA = boolToStr cfg.saveMedia; + SAVE_ARCHIVE_DOT_ORG = boolToStr cfg.saveArchiveDotOrg; + + TIMEOUT = toString cfg.timeout; + MEDIA_TIMEOUT = toString cfg.mediaTimeout; + URL_BLACKLIST = cfg.urlBlacklist; + URL_WHITELIST = cfg.urlWhitelist; + + BIND_ADDR = "${cfg.listenAddress}:${toString cfg.listenPort}"; + PUBLIC_INDEX = boolToStr cfg.publicViewIndex; + PUBLIC_SNAPSHOTS = boolToStr cfg.publicViewSnapshots; + PUBLIC_ADD_VIEW = boolToStr cfg.publicCreateSnapshots; + FOOTER_INFO = cfg.footerInfo; + SNAPSHOTS_PER_PAGE = toString cfg.snapshotsPerPage; + + RESOLUTION = cfg.screenshotResolution; + GIT_DOMAINS = cfg.gitDomains; + CHECK_SSL_VALIDITY = boolToStr cfg.checkSSLCert; + MEDIA_MAX_SIZE = cfg.mediaMaxSize; + CURL_USER_AGENT = cfg.curlUserAgent; + WGET_USER_AGENT = cfg.wgetUserAgent; + CHROME_USER_AGENT = cfg.chromiumUserAgent; + COOKIES_FILE = cfg.wgetCookiesFile; + CHROME_USER_DATA_DIR = cfg.chromiumUserDataDir; + + CURL_BINARY = if useCurl then "${pkgs.curl}/bin/curl" else null; + GIT_BINARY = if useGit then "${pkgs.git}/bin/git" else null; + WGET_BINARY = if useWget then "${pkgs.wget}/bin/wget" else null; + SINGLEFILE_BINARY = if useSinglefile then "${single-file}/bin/single-file" else null; + READABILITY_BINARY = if useReadability then "${readability-extractor}/bin/readability-extractor" else null; + MERCURY_BINARY = if useMercury then "${mercury-parser}/bin/mercury-parser" else null; + YOUTUBEDL_BINARY = if useYtdlp then "${pkgs.yt-dlp}/bin/yt-dlp" else null; + NODE_BINARY = "${pkgs.nodejs}/bin/nodejs"; # is this really needed? Nix already includes nodejs inside packages where needed + RIPGREP_BINARY = "${pkgs.ripgrep}/bin/rg"; + CHROME_BINARY = if useChromium then "${pkgs.chromium}/bin/chromium-browser" else null; + + USE_CURL = boolToStr useCurl; + USE_WGET = boolToStr useWget; + USE_SINGLEFILE = boolToStr useSinglefile; + USE_READABILITY = boolToStr useReadability; + USE_MERCURY = boolToStr useMercury; + USE_GIT = boolToStr useGit; + USE_CHROME = boolToStr useChromium; + USE_YOUTUBEDL = boolToStr useYtdlp; + USE_RIPGREP = boolToStr true; + + OUTPUT_DIR = cfg.dataDir; + }; + preStart = '' + mkdir -p ${cfg.dataDir} + chown ${cfg.user}:${cfg.group} ${cfg.dataDir} + # initalize/migrate data directory + cd ${cfg.dataDir} + ${pkgs.archivebox}/bin/archivebox init + ''; + }; + }; +} \ No newline at end of file diff --git a/node-env.nix b/node-env.nix new file mode 100644 index 0000000..5f05578 --- /dev/null +++ b/node-env.nix @@ -0,0 +1,588 @@ +# This file originates from node2nix + +{lib, stdenv, nodejs, python2, pkgs, libtool, runCommand, writeTextFile, writeShellScript}: + +let + # Workaround to cope with utillinux in Nixpkgs 20.09 and util-linux in Nixpkgs master + utillinux = if pkgs ? utillinux then pkgs.utillinux else pkgs.util-linux; + + python = if nodejs ? python then nodejs.python else python2; + + # Create a tar wrapper that filters all the 'Ignoring unknown extended header keyword' noise + tarWrapper = runCommand "tarWrapper" {} '' + mkdir -p $out/bin + + cat > $out/bin/tar <> $out/nix-support/hydra-build-products + ''; + }; + + # Common shell logic + installPackage = writeShellScript "install-package" '' + installPackage() { + local packageName=$1 src=$2 + + local strippedName + + local DIR=$PWD + cd $TMPDIR + + unpackFile $src + + # Make the base dir in which the target dependency resides first + mkdir -p "$(dirname "$DIR/$packageName")" + + if [ -f "$src" ] + then + # Figure out what directory has been unpacked + packageDir="$(find . -maxdepth 1 -type d | tail -1)" + + # Restore write permissions to make building work + find "$packageDir" -type d -exec chmod u+x {} \; + chmod -R u+w "$packageDir" + + # Move the extracted tarball into the output folder + mv "$packageDir" "$DIR/$packageName" + elif [ -d "$src" ] + then + # Get a stripped name (without hash) of the source directory. + # On old nixpkgs it's already set internally. + if [ -z "$strippedName" ] + then + strippedName="$(stripHash $src)" + fi + + # Restore write permissions to make building work + chmod -R u+w "$strippedName" + + # Move the extracted directory into the output folder + mv "$strippedName" "$DIR/$packageName" + fi + + # Change to the package directory to install dependencies + cd "$DIR/$packageName" + } + ''; + + # Bundle the dependencies of the package + # + # Only include dependencies if they don't exist. They may also be bundled in the package. + includeDependencies = {dependencies}: + lib.optionalString (dependencies != []) ( + '' + mkdir -p node_modules + cd node_modules + '' + + (lib.concatMapStrings (dependency: + '' + if [ ! -e "${dependency.name}" ]; then + ${composePackage dependency} + fi + '' + ) dependencies) + + '' + cd .. + '' + ); + + # Recursively composes the dependencies of a package + composePackage = { name, packageName, src, dependencies ? [], ... }@args: + builtins.addErrorContext "while evaluating node package '${packageName}'" '' + installPackage "${packageName}" "${src}" + ${includeDependencies { inherit dependencies; }} + cd .. + ${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."} + ''; + + pinpointDependencies = {dependencies, production}: + let + pinpointDependenciesFromPackageJSON = writeTextFile { + name = "pinpointDependencies.js"; + text = '' + var fs = require('fs'); + var path = require('path'); + + function resolveDependencyVersion(location, name) { + if(location == process.env['NIX_STORE']) { + return null; + } else { + var dependencyPackageJSON = path.join(location, "node_modules", name, "package.json"); + + if(fs.existsSync(dependencyPackageJSON)) { + var dependencyPackageObj = JSON.parse(fs.readFileSync(dependencyPackageJSON)); + + if(dependencyPackageObj.name == name) { + return dependencyPackageObj.version; + } + } else { + return resolveDependencyVersion(path.resolve(location, ".."), name); + } + } + } + + function replaceDependencies(dependencies) { + if(typeof dependencies == "object" && dependencies !== null) { + for(var dependency in dependencies) { + var resolvedVersion = resolveDependencyVersion(process.cwd(), dependency); + + if(resolvedVersion === null) { + process.stderr.write("WARNING: cannot pinpoint dependency: "+dependency+", context: "+process.cwd()+"\n"); + } else { + dependencies[dependency] = resolvedVersion; + } + } + } + } + + /* Read the package.json configuration */ + var packageObj = JSON.parse(fs.readFileSync('./package.json')); + + /* Pinpoint all dependencies */ + replaceDependencies(packageObj.dependencies); + if(process.argv[2] == "development") { + replaceDependencies(packageObj.devDependencies); + } + replaceDependencies(packageObj.optionalDependencies); + + /* Write the fixed package.json file */ + fs.writeFileSync("package.json", JSON.stringify(packageObj, null, 2)); + ''; + }; + in + '' + node ${pinpointDependenciesFromPackageJSON} ${if production then "production" else "development"} + + ${lib.optionalString (dependencies != []) + '' + if [ -d node_modules ] + then + cd node_modules + ${lib.concatMapStrings (dependency: pinpointDependenciesOfPackage dependency) dependencies} + cd .. + fi + ''} + ''; + + # Recursively traverses all dependencies of a package and pinpoints all + # dependencies in the package.json file to the versions that are actually + # being used. + + pinpointDependenciesOfPackage = { packageName, dependencies ? [], production ? true, ... }@args: + '' + if [ -d "${packageName}" ] + then + cd "${packageName}" + ${pinpointDependencies { inherit dependencies production; }} + cd .. + ${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."} + fi + ''; + + # Extract the Node.js source code which is used to compile packages with + # native bindings + nodeSources = runCommand "node-sources" {} '' + tar --no-same-owner --no-same-permissions -xf ${nodejs.src} + mv node-* $out + ''; + + # Script that adds _integrity fields to all package.json files to prevent NPM from consulting the cache (that is empty) + addIntegrityFieldsScript = writeTextFile { + name = "addintegrityfields.js"; + text = '' + var fs = require('fs'); + var path = require('path'); + + function augmentDependencies(baseDir, dependencies) { + for(var dependencyName in dependencies) { + var dependency = dependencies[dependencyName]; + + // Open package.json and augment metadata fields + var packageJSONDir = path.join(baseDir, "node_modules", dependencyName); + var packageJSONPath = path.join(packageJSONDir, "package.json"); + + if(fs.existsSync(packageJSONPath)) { // Only augment packages that exist. Sometimes we may have production installs in which development dependencies can be ignored + console.log("Adding metadata fields to: "+packageJSONPath); + var packageObj = JSON.parse(fs.readFileSync(packageJSONPath)); + + if(dependency.integrity) { + packageObj["_integrity"] = dependency.integrity; + } else { + packageObj["_integrity"] = "sha1-000000000000000000000000000="; // When no _integrity string has been provided (e.g. by Git dependencies), add a dummy one. It does not seem to harm and it bypasses downloads. + } + + if(dependency.resolved) { + packageObj["_resolved"] = dependency.resolved; // Adopt the resolved property if one has been provided + } else { + packageObj["_resolved"] = dependency.version; // Set the resolved version to the version identifier. This prevents NPM from cloning Git repositories. + } + + if(dependency.from !== undefined) { // Adopt from property if one has been provided + packageObj["_from"] = dependency.from; + } + + fs.writeFileSync(packageJSONPath, JSON.stringify(packageObj, null, 2)); + } + + // Augment transitive dependencies + if(dependency.dependencies !== undefined) { + augmentDependencies(packageJSONDir, dependency.dependencies); + } + } + } + + if(fs.existsSync("./package-lock.json")) { + var packageLock = JSON.parse(fs.readFileSync("./package-lock.json")); + + if(![1, 2].includes(packageLock.lockfileVersion)) { + process.stderr.write("Sorry, I only understand lock file versions 1 and 2!\n"); + process.exit(1); + } + + if(packageLock.dependencies !== undefined) { + augmentDependencies(".", packageLock.dependencies); + } + } + ''; + }; + + # Reconstructs a package-lock file from the node_modules/ folder structure and package.json files with dummy sha1 hashes + reconstructPackageLock = writeTextFile { + name = "addintegrityfields.js"; + text = '' + var fs = require('fs'); + var path = require('path'); + + var packageObj = JSON.parse(fs.readFileSync("package.json")); + + var lockObj = { + name: packageObj.name, + version: packageObj.version, + lockfileVersion: 1, + requires: true, + dependencies: {} + }; + + function augmentPackageJSON(filePath, dependencies) { + var packageJSON = path.join(filePath, "package.json"); + if(fs.existsSync(packageJSON)) { + var packageObj = JSON.parse(fs.readFileSync(packageJSON)); + dependencies[packageObj.name] = { + version: packageObj.version, + integrity: "sha1-000000000000000000000000000=", + dependencies: {} + }; + processDependencies(path.join(filePath, "node_modules"), dependencies[packageObj.name].dependencies); + } + } + + function processDependencies(dir, dependencies) { + if(fs.existsSync(dir)) { + var files = fs.readdirSync(dir); + + files.forEach(function(entry) { + var filePath = path.join(dir, entry); + var stats = fs.statSync(filePath); + + if(stats.isDirectory()) { + if(entry.substr(0, 1) == "@") { + // When we encounter a namespace folder, augment all packages belonging to the scope + var pkgFiles = fs.readdirSync(filePath); + + pkgFiles.forEach(function(entry) { + if(stats.isDirectory()) { + var pkgFilePath = path.join(filePath, entry); + augmentPackageJSON(pkgFilePath, dependencies); + } + }); + } else { + augmentPackageJSON(filePath, dependencies); + } + } + }); + } + } + + processDependencies("node_modules", lockObj.dependencies); + + fs.writeFileSync("package-lock.json", JSON.stringify(lockObj, null, 2)); + ''; + }; + + prepareAndInvokeNPM = {packageName, bypassCache, reconstructLock, npmFlags, production}: + let + forceOfflineFlag = if bypassCache then "--offline" else "--registry http://www.example.com"; + in + '' + # Pinpoint the versions of all dependencies to the ones that are actually being used + echo "pinpointing versions of dependencies..." + source $pinpointDependenciesScriptPath + + # Patch the shebangs of the bundled modules to prevent them from + # calling executables outside the Nix store as much as possible + patchShebangs . + + # Deploy the Node.js package by running npm install. Since the + # dependencies have been provided already by ourselves, it should not + # attempt to install them again, which is good, because we want to make + # it Nix's responsibility. If it needs to install any dependencies + # anyway (e.g. because the dependency parameters are + # incomplete/incorrect), it fails. + # + # The other responsibilities of NPM are kept -- version checks, build + # steps, postprocessing etc. + + export HOME=$TMPDIR + cd "${packageName}" + runHook preRebuild + + ${lib.optionalString bypassCache '' + ${lib.optionalString reconstructLock '' + if [ -f package-lock.json ] + then + echo "WARNING: Reconstruct lock option enabled, but a lock file already exists!" + echo "This will most likely result in version mismatches! We will remove the lock file and regenerate it!" + rm package-lock.json + else + echo "No package-lock.json file found, reconstructing..." + fi + + node ${reconstructPackageLock} + ''} + + node ${addIntegrityFieldsScript} + ''} + + npm ${forceOfflineFlag} --nodedir=${nodeSources} ${npmFlags} ${lib.optionalString production "--production"} rebuild + + if [ "''${dontNpmInstall-}" != "1" ] + then + # NPM tries to download packages even when they already exist if npm-shrinkwrap is used. + rm -f npm-shrinkwrap.json + + npm ${forceOfflineFlag} --nodedir=${nodeSources} ${npmFlags} ${lib.optionalString production "--production"} install + fi + ''; + + # Builds and composes an NPM package including all its dependencies + buildNodePackage = + { name + , packageName + , version + , dependencies ? [] + , buildInputs ? [] + , production ? true + , npmFlags ? "" + , dontNpmInstall ? false + , bypassCache ? false + , reconstructLock ? false + , preRebuild ? "" + , dontStrip ? true + , unpackPhase ? "true" + , buildPhase ? "true" + , meta ? {} + , ... }@args: + + let + extraArgs = removeAttrs args [ "name" "dependencies" "buildInputs" "dontStrip" "dontNpmInstall" "preRebuild" "unpackPhase" "buildPhase" "meta" ]; + in + stdenv.mkDerivation ({ + name = "${name}-${version}"; + buildInputs = [ tarWrapper python nodejs ] + ++ lib.optional (stdenv.isLinux) utillinux + ++ lib.optional (stdenv.isDarwin) libtool + ++ buildInputs; + + inherit nodejs; + + inherit dontStrip; # Stripping may fail a build for some package deployments + inherit dontNpmInstall preRebuild unpackPhase buildPhase; + + compositionScript = composePackage args; + pinpointDependenciesScript = pinpointDependenciesOfPackage args; + + passAsFile = [ "compositionScript" "pinpointDependenciesScript" ]; + + installPhase = '' + source ${installPackage} + + # Create and enter a root node_modules/ folder + mkdir -p $out/lib/node_modules + cd $out/lib/node_modules + + # Compose the package and all its dependencies + source $compositionScriptPath + + ${prepareAndInvokeNPM { inherit packageName bypassCache reconstructLock npmFlags production; }} + + # Create symlink to the deployed executable folder, if applicable + if [ -d "$out/lib/node_modules/.bin" ] + then + ln -s $out/lib/node_modules/.bin $out/bin + fi + + # Create symlinks to the deployed manual page folders, if applicable + if [ -d "$out/lib/node_modules/${packageName}/man" ] + then + mkdir -p $out/share + for dir in "$out/lib/node_modules/${packageName}/man/"* + do + mkdir -p $out/share/man/$(basename "$dir") + for page in "$dir"/* + do + ln -s $page $out/share/man/$(basename "$dir") + done + done + fi + + # Run post install hook, if provided + runHook postInstall + ''; + + meta = { + # default to Node.js' platforms + platforms = nodejs.meta.platforms; + } // meta; + } // extraArgs); + + # Builds a node environment (a node_modules folder and a set of binaries) + buildNodeDependencies = + { name + , packageName + , version + , src + , dependencies ? [] + , buildInputs ? [] + , production ? true + , npmFlags ? "" + , dontNpmInstall ? false + , bypassCache ? false + , reconstructLock ? false + , dontStrip ? true + , unpackPhase ? "true" + , buildPhase ? "true" + , ... }@args: + + let + extraArgs = removeAttrs args [ "name" "dependencies" "buildInputs" ]; + in + stdenv.mkDerivation ({ + name = "node-dependencies-${name}-${version}"; + + buildInputs = [ tarWrapper python nodejs ] + ++ lib.optional (stdenv.isLinux) utillinux + ++ lib.optional (stdenv.isDarwin) libtool + ++ buildInputs; + + inherit dontStrip; # Stripping may fail a build for some package deployments + inherit dontNpmInstall unpackPhase buildPhase; + + includeScript = includeDependencies { inherit dependencies; }; + pinpointDependenciesScript = pinpointDependenciesOfPackage args; + + passAsFile = [ "includeScript" "pinpointDependenciesScript" ]; + + installPhase = '' + source ${installPackage} + + mkdir -p $out/${packageName} + cd $out/${packageName} + + source $includeScriptPath + + # Create fake package.json to make the npm commands work properly + cp ${src}/package.json . + chmod 644 package.json + ${lib.optionalString bypassCache '' + if [ -f ${src}/package-lock.json ] + then + cp ${src}/package-lock.json . + fi + ''} + + # Go to the parent folder to make sure that all packages are pinpointed + cd .. + ${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."} + + ${prepareAndInvokeNPM { inherit packageName bypassCache reconstructLock npmFlags production; }} + + # Expose the executables that were installed + cd .. + ${lib.optionalString (builtins.substring 0 1 packageName == "@") "cd .."} + + mv ${packageName} lib + ln -s $out/lib/node_modules/.bin $out/bin + ''; + } // extraArgs); + + # Builds a development shell + buildNodeShell = + { name + , packageName + , version + , src + , dependencies ? [] + , buildInputs ? [] + , production ? true + , npmFlags ? "" + , dontNpmInstall ? false + , bypassCache ? false + , reconstructLock ? false + , dontStrip ? true + , unpackPhase ? "true" + , buildPhase ? "true" + , ... }@args: + + let + nodeDependencies = buildNodeDependencies args; + in + stdenv.mkDerivation { + name = "node-shell-${name}-${version}"; + + buildInputs = [ python nodejs ] ++ lib.optional (stdenv.isLinux) utillinux ++ buildInputs; + buildCommand = '' + mkdir -p $out/bin + cat > $out/bin/shell <