From 3684f20b3bac39cd5bccf59a54c78e3758761479 Mon Sep 17 00:00:00 2001
From: peter <2031-peter-@users.noreply.git.fairkom.net>
Date: Fri, 19 May 2023 13:15:35 +0000
Subject: [PATCH] Add xmldsig signature verification

---
 README.md        |  8 ++++++-
 analysis.py      |  2 +-
 main.py          | 59 ++++++++++++++++++++++++++++++++++++++++++++----
 requirements.txt |  1 +
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 4f54b6c..3c9cba0 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,8 @@ Works perfectly in combination with the [Keycloak home IdP discovery plugin](htt
 
 ## Requirements
 * Python3.10+
+* [python-keycloak](https://pypi.org/project/python-keycloak/)
+* [pyXMLSecurity](https://github.com/IdentityPython/pyXMLSecurity) for XML Signature verification
 * (optional) an external database, to keep track of the syncs, if there is no external DB, sqlite has to be selected 
 
 ## Use
@@ -24,13 +26,17 @@ You may want to run the script once a day to update the IdPs, e.g. via cron or s
 
 ## Configuration
 ### Environment variables
- - `SAML2_METADATA_URL` (e.g. `https://example.org/metadata.xml`) 
+ - `SAML2_METADATA_URL` (URL to SAML 2.0 Metadata document)
+ - `SAML2_METADATA_CRT_FILE` (path to the X.509 certificate to use for XML Signature verification of the Metadata)
+ - `SAML2_METADATA_INSECURE` (if no XML Signature verification and no `validUntil` expiration check should be performed; NOT RECOMMENDED)
  - `KEYCLOAK_URL` (URL to Keycloak instance)
  - `KEYCLOAK_REALM` (Keycloak realm)
  - `KEYCLOAK_USER` (Keycloak user)
  - `KEYCLOAK_PASSWORD` (Keycloak users password)
  - `DB_TYPE` (`postresql`, `mysql` or `sqlite`)
 
+> Note that *either* `SAML2_METADATA_CRT_FILE` must be set (to enable XML Signature verification of signed SAML 2.0 Metadata) *or* `SAML2_METADATA_INSECURE` (to skip it). If both are set `SAML2_METADATA_INSECURE` is ignored.
+
 For external databases:
  - `DB_USER` (database user)
  - `DB_PASSWORD` (database password)
diff --git a/analysis.py b/analysis.py
index d6c15ee..eaa932f 100644
--- a/analysis.py
+++ b/analysis.py
@@ -39,7 +39,7 @@ def get_metadata():
     if not url:
         sys.exit("Environment variable SAML2_METADATA_URL not set.")
     r = requests.get(url, allow_redirects=True)
-    parser = etree.XMLParser()
+    parser = etree.XMLParser(resolve_entities=False)
     root = etree.fromstring(r.content, parser)
     return root
 
diff --git a/main.py b/main.py
index eac1b0a..4f59e00 100644
--- a/main.py
+++ b/main.py
@@ -20,7 +20,9 @@ import hashlib
 import json
 import os
 import sqlite3
-from datetime import datetime
+import sys
+import xmlsec
+from datetime import datetime, timezone
 
 import database
 
@@ -52,6 +54,11 @@ class Metadata_importer():
 
     def __get_settings(self):
         self.metadata_feed_url = os.getenv('SAML2_METADATA_URL')
+        self.metadata_feed_crt = os.getenv('SAML2_METADATA_CRT_FILE')
+        self.metadata_feed_insecure = os.getenv('SAML2_METADATA_INSECURE', False)
+        if self.metadata_feed_crt and self.metadata_feed_insecure:
+            logger.warning("Ignoring SAML2_METADATA_INSECURE because SAML2_METADATA_CRT_FILE is set.")
+            self.metadata_feed_insecure = False
         self.keycloak_url = os.getenv('KEYCLOAK_URL')
         self.keycloak_realm = os.getenv('KEYCLOAK_REALM')
         self.keycloak_user = os.getenv('KEYCLOAK_USER')
@@ -129,15 +136,59 @@ class Metadata_importer():
                 }
         return result
 
+    def validate_metadata_signature(self, root):
+        certspec = self.metadata_feed_crt
+        try:
+            xmlsec.verified(root, certspec)
+        except xmlsec.exceptions.XMLSigException:
+            msg="XML Signature verification failed"
+            logger.error(msg)
+            sys.exit(msg)
+
+    def parse_date(self, date):
+        try:
+            return datetime.fromisoformat(date)
+        except ValueError:
+            return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z")
+
+    def check_expiration(self, date):
+        if self.metadata_feed_insecure:
+            logger.warning("Skipping validUntil check")
+            return
+        if not date:
+            logger.warning("Metadata has no 'validUntil'")
+            return
+        now = datetime.now(timezone.utc)
+        valid_until = self.parse_date(date)
+        delta = now - valid_until
+        if delta.days > 0:
+            msg="Metadata expired at {} (now is {})".format(valid_until, now)
+            logger.error(msg)
+            sys.exit(msg)
+
+    def check_metadata(self, root):
+        self.check_expiration(root.get("validUntil"))
+        signed = root.xpath("/md:EntitiesDescriptor/ds:Signature", namespaces={"md": "urn:oasis:names:tc:SAML:2.0:metadata", "ds": "http://www.w3.org/2000/09/xmldsig#"})
+        if len(signed) and self.metadata_feed_crt:
+            self.validate_metadata_signature(root)
+        elif len(signed) and not self.metadata_feed_crt and not self.metadata_feed_insecure:
+            msg="Set SAML2_METADATA_CRT_FILE to enable XML Signature verification (strongly recommended) or SAML2_METADATA_INSECURE to skip it."
+            logger.error(msg)
+            sys.exit(msg)
+        else:
+            logger.warning("Skipping XML Signature verification")
+        return root
+
     def get_metadata(self):
         requests.packages.urllib3.util.connection.HAS_IPV6 = False
         url = self.metadata_feed_url
-        header = {"user-agent": "curl/7.85.0"}
+        header = {"user-agent": "keycloak-metadata-importer"}
 
         r = requests.get(url, allow_redirects=True, headers=header)
-        parser = etree.XMLParser()
+        r.raise_for_status()
+        parser = etree.XMLParser(resolve_entities=False)
         root = etree.fromstring(r.content, parser)
-        return root
+        return self.check_metadata(root)
 
     def get_entity_descriptors(self):
         root = self.get_metadata()
diff --git a/requirements.txt b/requirements.txt
index 9f9f6e0..beede3f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 ecs_logging
 lxml
 peewee
+pyXMLSecurity
 python-keycloak
 requests
-- 
GitLab