K8s: Caddy error when generating certs

Hello.

We are setting up Open edX using Tutor in a self-hosted Kubernetes cluster using AWS EC2 machines.
Upon running the caddy pod first using tutor k8s start caddy it is unable to generate some certificates due to various errors. The pod logs are listed below.

{"level":"info","ts":1725531770.4546814,"logger":"tls.obtain","msg":"acquiring lock","identifier":"files.openedx.enchatted.com"}
{"level":"info","ts":1725531770.4614372,"logger":"tls.obtain","msg":"acquiring lock","identifier":"preview.openedx.enchatted.com"}
{"level":"info","ts":1725531770.462866,"logger":"tls.obtain","msg":"lock acquired","identifier":"openedx.enchatted.com"}
{"level":"info","ts":1725531770.4632387,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"openedx.enchatted.com"}
{"level":"info","ts":1725531770.4674988,"logger":"tls.obtain","msg":"lock acquired","identifier":"studio.openedx.enchatted.com"}
{"level":"info","ts":1725531770.467891,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"studio.openedx.enchatted.com"}
{"level":"info","ts":1725531770.4752223,"logger":"tls.obtain","msg":"lock acquired","identifier":"files.openedx.enchatted.com"}
{"level":"info","ts":1725531770.4756598,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"files.openedx.enchatted.com"}
{"level":"info","ts":1725531770.4813,"logger":"tls.obtain","msg":"lock acquired","identifier":"preview.openedx.enchatted.com"}
{"level":"info","ts":1725531770.4815571,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"preview.openedx.enchatted.com"}
{"level":"warn","ts":1725531800.4577954,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725531830.7099082,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725531860.9610581,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725531860.9613905,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"minio.openedx.enchatted.com","issuer":"acme-v02.api.letsencrypt.org-directory","error":"registering account [] with server: provisioning client: performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725531860.962882,"logger":"http","msg":"missing email address for ZeroSSL; it is strongly recommended to set one for next time"}
{"level":"warn","ts":1725531890.9621415,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725531890.963515,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"minio.openedx.enchatted.com","issuer":"acme.zerossl.com-v2-DV90","error":"account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout"}
{"level":"error","ts":1725531890.9637241,"logger":"tls.obtain","msg":"will retry","error":"[minio.openedx.enchatted.com] Obtain: account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout","attempt":1,"retrying_in":60,"elapsed":120.525664397,"max_duration":2592000}
{"level":"warn","ts":1725531921.2136161,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"info","ts":1725531950.9650018,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"minio.openedx.enchatted.com"}
{"level":"warn","ts":1725531951.4654934,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725531951.4655674,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"apps.openedx.enchatted.com","issuer":"acme-v02.api.letsencrypt.org-directory","error":"registering account [] with server: provisioning client: performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725531951.4658325,"logger":"http","msg":"missing email address for ZeroSSL; it is strongly recommended to set one for next time"}
{"level":"warn","ts":1725531981.4663975,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725531981.4664683,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"apps.openedx.enchatted.com","issuer":"acme.zerossl.com-v2-DV90","error":"account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout"}
{"level":"error","ts":1725531981.4665303,"logger":"tls.obtain","msg":"will retry","error":"[apps.openedx.enchatted.com] Obtain: account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout","attempt":1,"retrying_in":60,"elapsed":211.022745085,"max_duration":2592000}
{"level":"warn","ts":1725532011.7182052,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"info","ts":1725532041.466975,"logger":"tls.obtain","msg":"obtaining certificate","identifier":"apps.openedx.enchatted.com"}
{"level":"warn","ts":1725532041.9692352,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725532041.9693525,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"openedx.enchatted.com","issuer":"acme-v02.api.letsencrypt.org-directory","error":"registering account [] with server: provisioning client: performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725532041.9695723,"logger":"http","msg":"missing email address for ZeroSSL; it is strongly recommended to set one for next time"}
{"level":"error","ts":1725532071.9697645,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"openedx.enchatted.com","issuer":"acme.zerossl.com-v2-DV90","error":"account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout"}
{"level":"warn","ts":1725532071.9697728,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725532071.96989,"logger":"tls.obtain","msg":"will retry","error":"[openedx.enchatted.com] Obtain: account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout","attempt":1,"retrying_in":60,"elapsed":301.506977629,"max_duration":2592000}
{"level":"warn","ts":1725532102.2220774,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}

This results in all subsequent pods and jobs failing (such as minio) since there are no secure endpoints for them.
I have also stumbled upon another thread of this (Deploying in Kubernetes env using tutor in Oracle Cloud) which was never resolved and closed due to inactivity.
What could be the cause of this? Any help will be greatly appreciated.

Thanks.

Hi @Retr0,
How are you starting the platform? You should do tutor k8s start or tutor k8s launch -I, so all containers are started. If you only start Caddy without the other pods it may fail to validate the internal subdomains.

Hello, thanks for the quick reply!

The reason I am starting Caddy first is because it is stated so in the Tutor documentation here but regardless, I have also tried to normally launch it using tutor k8s start, multiple times without avail.

All pods, services and everything else seems to be running ok but the Caddy service is still failing to get an external IP and grabbing the pod logs show the same errors I listed above.

Ok. Tutor k8s will create by default a load balancer for each namespace. Check that all load balancers are up and running. Then point your DNS to the public address of the load balancer. This should make it work.

As I said in my replies, the Caddy pod which ultimately provides to the service the necessary data needed to function is failing due to the errors I listed above.

Yes, Tutor does create the Load Balancer but there is no external IP assigned since Caddy is not running properly.

I tried fully resetting and setting up our cluster, starting from a clean slate again.
No luck in fixing this, still got similar errors while generating certs.

{"level":"warn","ts":1725963625.9582021,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725963656.2085097,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725963686.4600103,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725963686.460208,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"minio.openedx.enchatted.com","issuer":"acme-v02.api.letsencrypt.org-directory","error":"registering account [] with server: provisioning client: performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"warn","ts":1725963686.4604642,"logger":"http","msg":"missing email address for ZeroSSL; it is strongly recommended to set one for next time"}
{"level":"warn","ts":1725963716.4607253,"logger":"http.acme_client","msg":"HTTP request failed; retrying","url":"https://acme-v02.api.letsencrypt.org/directory","error":"performing request: Get \"https://acme-v02.api.letsencrypt.org/directory\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
{"level":"error","ts":1725963716.4607913,"logger":"tls.obtain","msg":"could not get certificate from issuer","identifier":"minio.openedx.enchatted.com","issuer":"acme.zerossl.com-v2-DV90","error":"account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout"}
{"level":"error","ts":1725963716.460858,"logger":"tls.obtain","msg":"will retry","error":"[minio.openedx.enchatted.com] Obtain: account pre-registration callback: performing EAB credentials request: Post \"https://api.zerossl.com/acme/eab-credentials-email\": dial tcp: lookup api.zerossl.com: i/o timeout","attempt":1,"retrying_in":60,"elapsed":120.510339967,"max_duration":2592000}