From 87c49f497607639e84e7f993cdd3778359f55d52 Mon Sep 17 00:00:00 2001
From: wl <zayd@disroot.org>
Date: Tue, 15 Apr 2025 21:16:54 -0400
Subject: [PATCH] new blog post about Anubis

Signed-off-by: wl <zayd@disroot.org>
---
 _zola/content/2025-04-16-anubis-is-a-joke.md  | 31 +++++++++
 _zola/templates/404.html                      | 10 +++
 .../{index.xhtml => index.html}               |  0
 .../{index.xhtml => index.html}               |  0
 .../{index.xhtml => index.html}               |  0
 .../{index.xhtml => index.html}               |  0
 blog/2025-04-16-anubis-is-a-joke/index.html   | 66 +++++++++++++++++++
 blog/404.html                                 | 40 +++++++++++
 blog/404.xhtml                                |  3 -
 blog/atom.xml                                 | 42 +++++++++++-
 blog/{index.xhtml => index.html}              |  8 +++
 blog/rss.xml                                  | 31 ++++++++-
 blog/sitemap.xml                              |  4 ++
 13 files changed, 230 insertions(+), 5 deletions(-)
 create mode 100644 _zola/content/2025-04-16-anubis-is-a-joke.md
 create mode 100644 _zola/templates/404.html
 rename blog/2025-01-24-new-blog/{index.xhtml => index.html} (100%)
 rename blog/2025-02-04-servers-up/{index.xhtml => index.html} (100%)
 rename blog/2025-03-24-the-internet-sucks/{index.xhtml => index.html} (100%)
 rename blog/2025-04-13-xhtml-is-good-actually/{index.xhtml => index.html} (100%)
 create mode 100644 blog/2025-04-16-anubis-is-a-joke/index.html
 create mode 100644 blog/404.html
 delete mode 100644 blog/404.xhtml
 rename blog/{index.xhtml => index.html} (86%)

diff --git a/_zola/content/2025-04-16-anubis-is-a-joke.md b/_zola/content/2025-04-16-anubis-is-a-joke.md
new file mode 100644
index 0000000..86a0161
--- /dev/null
+++ b/_zola/content/2025-04-16-anubis-is-a-joke.md
@@ -0,0 +1,31 @@
++++
+title = "Anubis is a joke"
+date = 2025-04-16
+description = "an easily bypassable one, and not actually protecting your site (against anything other than really low effort scrapes)"
++++
+
+Over the past few months, a lot of people have turned to Anubis by Xe Iaso for trying to protect
+their sites, primarily Git forges and alternative frontends, against AI scraping.
+
+Anubis is a new PoW captcha "solution" that (allegedly) holds out scrapers by slowing down your
+browsing and forcing you to enable JavaScript to pass a challenge to view the site. Once it's wasted
+a few seconds of your time and made you reevaluate the worth of whatever you were visiting, the
+stupid anime girl (previously AI generated) it shows you give a smile and you're on your way. This
+challenge only will work on Chromium and its Google-funded controlled opposition, Firefox. Basilisk
+does seem to work, though with broken CSS. It doesn't even work on Safari (allegedly, I don't own an
+iToy to test this with) and no other browser (until you read the next section) works on this.
+
+There's one small problem to Anubis though. By default (which no installation I've checked changes),
+Anubis will only present a challenge to User-Agents with "Mozilla" and some obvious scraper agents,
+at the time of me writing this. You can check this in /data/botPolicies.json.
+
+This means all one of those evil scrapers Anubis is supposed to protect against have to do to bypass
+Anubis is not use one of these User-Agents. It also means that you too can completely bypass this as
+I know it's been annoying a lot of people lately. You can curl a site using the default config (most
+of them), and it won't give an Anubis challenge, it'll just show you the site in its original
+form. No special options, no custom User-Agent, just curl http://domain.name and it'll let you
+through. This is applicable to your normal browser as well, just give it a user agent that doesn't
+contain "Mozilla" or any of the other terms in the file and you won't have any problems.
+
+I was expecting a much more involved workaround to dealing with this piece of shit but no, all you
+have to do is give it a UA not containing some keywords.
diff --git a/_zola/templates/404.html b/_zola/templates/404.html
new file mode 100644
index 0000000..3d84352
--- /dev/null
+++ b/_zola/templates/404.html
@@ -0,0 +1,10 @@
+{% extends "index.html" %}
+
+{% block title %}
+<title>wanderlost - 404</title>
+{% endblock title %}
+
+{% block content %}
+<h1>it's OVER</h1>
+<p>page not found</p>
+{% endblock content %}
diff --git a/blog/2025-01-24-new-blog/index.xhtml b/blog/2025-01-24-new-blog/index.html
similarity index 100%
rename from blog/2025-01-24-new-blog/index.xhtml
rename to blog/2025-01-24-new-blog/index.html
diff --git a/blog/2025-02-04-servers-up/index.xhtml b/blog/2025-02-04-servers-up/index.html
similarity index 100%
rename from blog/2025-02-04-servers-up/index.xhtml
rename to blog/2025-02-04-servers-up/index.html
diff --git a/blog/2025-03-24-the-internet-sucks/index.xhtml b/blog/2025-03-24-the-internet-sucks/index.html
similarity index 100%
rename from blog/2025-03-24-the-internet-sucks/index.xhtml
rename to blog/2025-03-24-the-internet-sucks/index.html
diff --git a/blog/2025-04-13-xhtml-is-good-actually/index.xhtml b/blog/2025-04-13-xhtml-is-good-actually/index.html
similarity index 100%
rename from blog/2025-04-13-xhtml-is-good-actually/index.xhtml
rename to blog/2025-04-13-xhtml-is-good-actually/index.html
diff --git a/blog/2025-04-16-anubis-is-a-joke/index.html b/blog/2025-04-16-anubis-is-a-joke/index.html
new file mode 100644
index 0000000..cc015ea
--- /dev/null
+++ b/blog/2025-04-16-anubis-is-a-joke/index.html
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+		  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+	<head>
+		<meta http-equiv="X-UA-Compatible" content="IE=edge" />
+		<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+		<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1, viewport-fit=cover" />
+		
+<title>wanderlost - Anubis is a joke</title>
+
+		<link rel="stylesheet" href="/assets/css/main.css" />
+
+		
+        <link rel="alternate"
+			  type="application/rss+xml"
+			  title="Atom"
+			  href="/blog/atom.xml" />
+        <link rel="alternate"
+			  type="application/rss+xml"
+			  title="RSS"
+			  href="/blog/rss.xml" />
+		
+	</head>
+	<body>
+		<div class="navbar">
+			<h1 class="title"><a href="/">wanderlost</a></h1>
+			<a href="/blog/">index</a>
+			<a href="/blog/atom.xml">atom</a>
+			<a href="/blog/rss.xml">rss</a>
+		</div>
+		<div class="main">
+			<hr />
+			
+<div class="post">
+	<h1 class="post-title">Anubis is a joke</h1>
+	<h2 class="post-date">2025-04-16</h2>
+	<!-- if Zola just generated compliant XHTML on its own that would be great, but looks like this will have to do -->
+	<p>Over the past few months, a lot of people have turned to Anubis by Xe Iaso for trying to protect
+their sites, primarily Git forges and alternative frontends, against AI scraping.</p>
+<p>Anubis is a new PoW captcha "solution" that (allegedly) holds out scrapers by slowing down your
+browsing and forcing you to enable JavaScript to pass a challenge to view the site. Once it's wasted
+a few seconds of your time and made you reevaluate the worth of whatever you were visiting, the
+stupid anime girl (previously AI generated) it shows you give a smile and you're on your way. This
+challenge only will work on Chromium and its Google-funded controlled opposition, Firefox. Basilisk
+does seem to work, though with broken CSS. It doesn't even work on Safari (allegedly, I don't own an
+iToy to test this with) and no other browser (until you read the next section) works on this.</p>
+<p>There's one small problem to Anubis though. By default (which no installation I've checked changes),
+Anubis will only present a challenge to User-Agents with "Mozilla" and some obvious scraper agents,
+at the time of me writing this. You can check this in /data/botPolicies.json.</p>
+<p>This means all one of those evil scrapers Anubis is supposed to protect against have to do to bypass
+Anubis is not use one of these User-Agents. It also means that you too can completely bypass this as
+I know it's been annoying a lot of people lately. You can curl a site using the default config (most
+of them), and it won't give an Anubis challenge, it'll just show you the site in its original
+form. No special options, no custom User-Agent, just curl http://domain.name and it'll let you
+through. This is applicable to your normal browser as well, just give it a user agent that doesn't
+contain "Mozilla" or any of the other terms in the file and you won't have any problems.</p>
+<p>I was expecting a much more involved workaround to dealing with this piece of shit but no, all you
+have to do is give it a UA not containing some keywords.</p>
+
+</div>
+
+
+		</div>
+	</body>
+</html>
diff --git a/blog/404.html b/blog/404.html
new file mode 100644
index 0000000..7256691
--- /dev/null
+++ b/blog/404.html
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+		  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+	<head>
+		<meta http-equiv="X-UA-Compatible" content="IE=edge" />
+		<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+		<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1, viewport-fit=cover" />
+		
+<title>wanderlost - 404</title>
+
+		<link rel="stylesheet" href="/assets/css/main.css" />
+
+		
+        <link rel="alternate"
+			  type="application/rss+xml"
+			  title="Atom"
+			  href="/blog/atom.xml" />
+        <link rel="alternate"
+			  type="application/rss+xml"
+			  title="RSS"
+			  href="/blog/rss.xml" />
+		
+	</head>
+	<body>
+		<div class="navbar">
+			<h1 class="title"><a href="/">wanderlost</a></h1>
+			<a href="/blog/">index</a>
+			<a href="/blog/atom.xml">atom</a>
+			<a href="/blog/rss.xml">rss</a>
+		</div>
+		<div class="main">
+			<hr />
+			
+<h1>it's OVER</h1>
+<p>page not found</p>
+
+		</div>
+	</body>
+</html>
diff --git a/blog/404.xhtml b/blog/404.xhtml
deleted file mode 100644
index f8414f0..0000000
--- a/blog/404.xhtml
+++ /dev/null
@@ -1,3 +0,0 @@
-<!doctype html>
-<title>404 Not Found</title>
-<h1>404 Not Found</h1>
diff --git a/blog/atom.xml b/blog/atom.xml
index 4079eb6..52def07 100644
--- a/blog/atom.xml
+++ b/blog/atom.xml
@@ -4,8 +4,48 @@
     <link rel="self" type="application/atom+xml" href="/blog/atom.xml"/>
     <link rel="alternate" type="text/html" href="/blog"/>
     <generator uri="https://www.getzola.org/">Zola</generator>
-    <updated>2025-04-13T00:00:00+00:00</updated>
+    <updated>2025-04-16T00:00:00+00:00</updated>
     <id>/blog/atom.xml</id>
+    <entry xml:lang="en">
+        <title>Anubis is a joke</title>
+        <published>2025-04-16T00:00:00+00:00</published>
+        <updated>2025-04-16T00:00:00+00:00</updated>
+        
+        <author>
+          <name>
+            
+              wanderlost
+            
+          </name>
+        </author>
+        
+        <link rel="alternate" type="text/html" href="/blog/2025-04-16-anubis-is-a-joke/"/>
+        <id>/blog/2025-04-16-anubis-is-a-joke/</id>
+        
+        <content type="html" xml:base="/blog/2025-04-16-anubis-is-a-joke/">&lt;p&gt;Over the past few months, a lot of people have turned to Anubis by Xe Iaso for trying to protect
+their sites, primarily Git forges and alternative frontends, against AI scraping.&lt;&#x2F;p&gt;
+&lt;p&gt;Anubis is a new PoW captcha &quot;solution&quot; that (allegedly) holds out scrapers by slowing down your
+browsing and forcing you to enable JavaScript to pass a challenge to view the site. Once it&#x27;s wasted
+a few seconds of your time and made you reevaluate the worth of whatever you were visiting, the
+stupid anime girl (previously AI generated) it shows you give a smile and you&#x27;re on your way. This
+challenge only will work on Chromium and its Google-funded controlled opposition, Firefox. Basilisk
+does seem to work, though with broken CSS. It doesn&#x27;t even work on Safari (allegedly, I don&#x27;t own an
+iToy to test this with) and no other browser (until you read the next section) works on this.&lt;&#x2F;p&gt;
+&lt;p&gt;There&#x27;s one small problem to Anubis though. By default (which no installation I&#x27;ve checked changes),
+Anubis will only present a challenge to User-Agents with &quot;Mozilla&quot; and some obvious scraper agents,
+at the time of me writing this. You can check this in &#x2F;data&#x2F;botPolicies.json.&lt;&#x2F;p&gt;
+&lt;p&gt;This means all one of those evil scrapers Anubis is supposed to protect against have to do to bypass
+Anubis is not use one of these User-Agents. It also means that you too can completely bypass this as
+I know it&#x27;s been annoying a lot of people lately. You can curl a site using the default config (most
+of them), and it won&#x27;t give an Anubis challenge, it&#x27;ll just show you the site in its original
+form. No special options, no custom User-Agent, just curl http:&#x2F;&#x2F;domain.name and it&#x27;ll let you
+through. This is applicable to your normal browser as well, just give it a user agent that doesn&#x27;t
+contain &quot;Mozilla&quot; or any of the other terms in the file and you won&#x27;t have any problems.&lt;&#x2F;p&gt;
+&lt;p&gt;I was expecting a much more involved workaround to dealing with this piece of shit but no, all you
+have to do is give it a UA not containing some keywords.&lt;&#x2F;p&gt;
+</content>
+        
+    </entry>
     <entry xml:lang="en">
         <title>XHTML is good, actually</title>
         <published>2025-04-13T00:00:00+00:00</published>
diff --git a/blog/index.xhtml b/blog/index.html
similarity index 86%
rename from blog/index.xhtml
rename to blog/index.html
index 497487b..cfdd642 100644
--- a/blog/index.xhtml
+++ b/blog/index.html
@@ -34,6 +34,14 @@
 			
 			<div class="posts">
 				
+				<h3 class="post-title">
+					2025-04-16 -
+                    <a href="/blog/2025-04-16-anubis-is-a-joke/">
+						Anubis is a joke
+                    </a>
+                </h3>
+				<p>an easily bypassable one, and not actually protecting your site (against anything other than really low effort scrapes)</p>
+                
 				<h3 class="post-title">
 					2025-04-13 -
                     <a href="/blog/2025-04-13-xhtml-is-good-actually/">
diff --git a/blog/rss.xml b/blog/rss.xml
index 2dcbf14..81f0d4a 100644
--- a/blog/rss.xml
+++ b/blog/rss.xml
@@ -7,7 +7,36 @@
       <generator>Zola</generator>
       <language>en</language>
       <atom:link href="/blog/rss.xml" rel="self" type="application/rss+xml"/>
-      <lastBuildDate>Sun, 13 Apr 2025 00:00:00 +0000</lastBuildDate>
+      <lastBuildDate>Wed, 16 Apr 2025 00:00:00 +0000</lastBuildDate>
+      <item>
+          <title>Anubis is a joke</title>
+          <pubDate>Wed, 16 Apr 2025 00:00:00 +0000</pubDate>
+          <author>wanderlost</author>
+          <link>/blog/2025-04-16-anubis-is-a-joke/</link>
+          <guid>/blog/2025-04-16-anubis-is-a-joke/</guid>
+          <description xml:base="/blog/2025-04-16-anubis-is-a-joke/">&lt;p&gt;Over the past few months, a lot of people have turned to Anubis by Xe Iaso for trying to protect
+their sites, primarily Git forges and alternative frontends, against AI scraping.&lt;&#x2F;p&gt;
+&lt;p&gt;Anubis is a new PoW captcha &quot;solution&quot; that (allegedly) holds out scrapers by slowing down your
+browsing and forcing you to enable JavaScript to pass a challenge to view the site. Once it&#x27;s wasted
+a few seconds of your time and made you reevaluate the worth of whatever you were visiting, the
+stupid anime girl (previously AI generated) it shows you give a smile and you&#x27;re on your way. This
+challenge only will work on Chromium and its Google-funded controlled opposition, Firefox. Basilisk
+does seem to work, though with broken CSS. It doesn&#x27;t even work on Safari (allegedly, I don&#x27;t own an
+iToy to test this with) and no other browser (until you read the next section) works on this.&lt;&#x2F;p&gt;
+&lt;p&gt;There&#x27;s one small problem to Anubis though. By default (which no installation I&#x27;ve checked changes),
+Anubis will only present a challenge to User-Agents with &quot;Mozilla&quot; and some obvious scraper agents,
+at the time of me writing this. You can check this in &#x2F;data&#x2F;botPolicies.json.&lt;&#x2F;p&gt;
+&lt;p&gt;This means all one of those evil scrapers Anubis is supposed to protect against have to do to bypass
+Anubis is not use one of these User-Agents. It also means that you too can completely bypass this as
+I know it&#x27;s been annoying a lot of people lately. You can curl a site using the default config (most
+of them), and it won&#x27;t give an Anubis challenge, it&#x27;ll just show you the site in its original
+form. No special options, no custom User-Agent, just curl http:&#x2F;&#x2F;domain.name and it&#x27;ll let you
+through. This is applicable to your normal browser as well, just give it a user agent that doesn&#x27;t
+contain &quot;Mozilla&quot; or any of the other terms in the file and you won&#x27;t have any problems.&lt;&#x2F;p&gt;
+&lt;p&gt;I was expecting a much more involved workaround to dealing with this piece of shit but no, all you
+have to do is give it a UA not containing some keywords.&lt;&#x2F;p&gt;
+</description>
+      </item>
       <item>
           <title>XHTML is good, actually</title>
           <pubDate>Sun, 13 Apr 2025 00:00:00 +0000</pubDate>
diff --git a/blog/sitemap.xml b/blog/sitemap.xml
index 7a2f051..7faa114 100644
--- a/blog/sitemap.xml
+++ b/blog/sitemap.xml
@@ -19,4 +19,8 @@
         <loc>/blog/2025-04-13-xhtml-is-good-actually/</loc>
         <lastmod>2025-04-13</lastmod>
     </url>
+    <url>
+        <loc>/blog/2025-04-16-anubis-is-a-joke/</loc>
+        <lastmod>2025-04-16</lastmod>
+    </url>
 </urlset>