From c011faab188fae1c84e45a8553d4eb479be7f121 Mon Sep 17 00:00:00 2001 From: Zuckerberg Date: Sun, 10 Aug 2025 15:17:27 -0700 Subject: [PATCH] Use flaresolverr with linkwarden --- machines/storage/s0/default.nix | 13 +++ patches/linkwarden-flaresolverr.patch | 144 ++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 patches/linkwarden-flaresolverr.patch diff --git a/machines/storage/s0/default.nix b/machines/storage/s0/default.nix index 519b3dd..6cf3ab4 100644 --- a/machines/storage/s0/default.nix +++ b/machines/storage/s0/default.nix @@ -320,7 +320,15 @@ enableRegistration = true; port = 41709; environment.NEXTAUTH_URL = "https://linkwarden.s0.neet.dev/api/v1/auth"; + environment.FLARESOLVERR_URL = "http://localhost:${toString config.services.flaresolverr.port}/v1"; environmentFile = "/run/agenix/linkwarden-environment"; + package = pkgs.linkwarden.overrideAttrs (oldAttrs: { + # Add patch that adds support for flaresolverr + patches = oldAttrs.patches or [ ] ++ [ + # https://github.com/linkwarden/linkwarden/pull/1251 + ../../../patches/linkwarden-flaresolverr.patch + ]; + }); }; age.secrets.linkwarden-environment.file = ../../../secrets/linkwarden-environment.age; services.meilisearch = { @@ -328,5 +336,10 @@ package = pkgs.meilisearch; }; + services.flaresolverr = { + enable = true; + port = 48072; + }; + boot.binfmt.emulatedSystems = [ "aarch64-linux" "armv7l-linux" ]; } diff --git a/patches/linkwarden-flaresolverr.patch b/patches/linkwarden-flaresolverr.patch new file mode 100644 index 0000000..b4ec84f --- /dev/null +++ b/patches/linkwarden-flaresolverr.patch @@ -0,0 +1,144 @@ +commit 3dac9f081f267e4a528decbd9d50e1f45ea7c2ba +Author: SteveImmanuel +Date: Fri Jun 27 13:07:38 2025 +0900 + + Add flaresolverr support into linkwarden + +diff --git a/.env.sample b/.env.sample +index bd3abcb0..0ca96d92 100644 +--- a/.env.sample ++++ b/.env.sample +@@ -43,6 +43,7 @@ TEXT_CONTENT_LIMIT= + SEARCH_FILTER_LIMIT= + INDEX_TAKE_COUNT= + MEILI_TIMEOUT= ++FLARESOLVERR_URL= + + # AI Settings + NEXT_PUBLIC_OLLAMA_ENDPOINT_URL= +diff --git a/apps/worker/lib/archiveHandler.ts b/apps/worker/lib/archiveHandler.ts +index 8ae19e2c..6c8656b2 100644 +--- a/apps/worker/lib/archiveHandler.ts ++++ b/apps/worker/lib/archiveHandler.ts +@@ -6,6 +6,7 @@ import { + chromium, + devices, + } from "playwright"; ++import axios from 'axios'; + import { prisma } from "@linkwarden/prisma"; + import sendToWayback from "./preservationScheme/sendToWayback"; + import { AiTaggingMethod } from "@linkwarden/prisma/client"; +@@ -75,6 +76,22 @@ export default async function archiveHandler( + }); + + const { browser, context } = await getBrowser(); ++ ++ const captchaSolve = await solveCaptcha(link.url); ++ ++ if (captchaSolve.status === 'error') { ++ console.error('Error solving captcha'); ++ } else if (captchaSolve.status === 'fail') { ++ console.warn('Failed solving captcha'); ++ } else if (captchaSolve.status === 'skip') { ++ console.info('Skip solving captcha'); ++ } else { ++ if (captchaSolve.solution) { ++ console.info('Solving captcha'); ++ await context.addCookies(captchaSolve.solution.cookies); ++ } ++ } ++ + const page = await context.newPage(); + + createFolder({ filePath: `archives/preview/${link.collectionId}` }); +@@ -105,6 +122,7 @@ export default async function archiveHandler( + aiTag: user.aiTaggingMethod !== AiTaggingMethod.DISABLED, + }; + ++ let newLinkName = ''; + try { + await Promise.race([ + (async () => { +@@ -127,6 +145,7 @@ export default async function archiveHandler( + // archive url + + await page.goto(link.url, { waitUntil: "domcontentloaded" }); ++ newLinkName = await page.title(); + + const metaDescription = await page.evaluate(() => { + const description = document.querySelector( +@@ -186,10 +205,16 @@ export default async function archiveHandler( + where: { id: link.id }, + }); + +- if (finalLink) ++ if (finalLink) { ++ // Replace the captcha-blocked link name if it has not been updated by user, else keep the same name ++ if (newLinkName === '' || finalLink.name === newLinkName || finalLink.name !== 'Just a moment...') { ++ newLinkName = finalLink.name; ++ } ++ + await prisma.link.update({ + where: { id: link.id }, + data: { ++ name: newLinkName, + lastPreserved: new Date().toISOString(), + readable: !finalLink.readable ? "unavailable" : undefined, + image: !finalLink.image ? "unavailable" : undefined, +@@ -203,6 +228,7 @@ export default async function archiveHandler( + : undefined, + }, + }); ++ } + else { + await removeFiles(link.id, link.collectionId); + } +@@ -271,6 +297,48 @@ export function getBrowserOptions(): LaunchOptions { + return browserOptions; + } + ++async function solveCaptcha(url: string, maxTimeout: number = 60000): Promise<{ ++ status: string, ++ solution?: { ++ cookies: { ++ name: string, ++ value: string, ++ domain: string, ++ path: string, ++ secure: boolean, ++ expires?: number, ++ httpOnly?: boolean, ++ sameSite?: "Strict" | "Lax" | "None" ++ }[], ++ } ++}> { ++ if (process.env.FLARESOLVERR_URL) { ++ try { ++ const response = await axios.post(process.env.FLARESOLVERR_URL, ++ { ++ cmd: 'request.get', ++ url, ++ maxTimeout ++ }, ++ { ++ headers: { 'Content-Type': 'application/json' } ++ } ++ ) ++ ++ if (response.status !== 200) { ++ return { status: 'fail' }; ++ } ++ ++ return { status: response.data.status, solution: response.data.solution }; ++ } catch (error) { ++ console.error('Error during captcha solving:', error); ++ return { status: 'error' }; ++ } ++ } ++ ++ return { status: 'skip' }; ++} ++ + async function getBrowser(): Promise<{ + browser: Browser; + context: BrowserContext;