server: update health check endpoint to query storage periodically
Instead of querying the storage every time a health check is performed query it periodically and save the result.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -10,6 +11,7 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
@@ -20,31 +22,85 @@ import (
|
||||
"github.com/dexidp/dex/storage"
|
||||
)
|
||||
|
||||
func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
|
||||
start := s.now()
|
||||
err := func() error {
|
||||
// Instead of trying to introspect health, just try to use the underlying storage.
|
||||
a := storage.AuthRequest{
|
||||
ID: storage.NewID(),
|
||||
ClientID: storage.NewID(),
|
||||
// newHealthChecker returns the healthz handler. The handler runs until the
|
||||
// provided context is canceled.
|
||||
func (s *Server) newHealthChecker(ctx context.Context) http.Handler {
|
||||
h := &healthChecker{s: s}
|
||||
|
||||
// Set a short expiry so if the delete fails this will be cleaned up quickly by garbage collection.
|
||||
Expiry: s.now().Add(time.Minute),
|
||||
}
|
||||
// Perform one health check synchronously so the returned handler returns
|
||||
// valid data immediately.
|
||||
h.runHealthCheck()
|
||||
|
||||
if err := s.storage.CreateAuthRequest(a); err != nil {
|
||||
return fmt.Errorf("create auth request: %v", err)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(time.Second * 15):
|
||||
}
|
||||
h.runHealthCheck()
|
||||
}
|
||||
if err := s.storage.DeleteAuthRequest(a.ID); err != nil {
|
||||
return fmt.Errorf("delete auth request: %v", err)
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
return h
|
||||
}
|
||||
|
||||
t := s.now().Sub(start)
|
||||
// healthChecker periodically performs health checks on server dependenices.
|
||||
// Currently, it only checks that the storage layer is avialable.
|
||||
type healthChecker struct {
|
||||
s *Server
|
||||
|
||||
// Result of the last health check: any error and the amount of time it took
|
||||
// to query the storage.
|
||||
mu sync.RWMutex
|
||||
// Guarded by the mutex
|
||||
err error
|
||||
passed time.Duration
|
||||
}
|
||||
|
||||
// runHealthCheck performs a single health check and makes the result available
|
||||
// for any clients performing and HTTP request against the healthChecker.
|
||||
func (h *healthChecker) runHealthCheck() {
|
||||
t := h.s.now()
|
||||
err := checkStorageHealth(h.s.storage, h.s.now)
|
||||
passed := h.s.now().Sub(t)
|
||||
if err != nil {
|
||||
s.logger.Errorf("Storage health check failed: %v", err)
|
||||
s.renderError(w, http.StatusInternalServerError, "Health check failed.")
|
||||
h.s.logger.Errorf("Storage health check failed: %v", err)
|
||||
}
|
||||
|
||||
// Make sure to only hold the mutex to access the fields, and not while
|
||||
// we're querying the storage object.
|
||||
h.mu.Lock()
|
||||
h.err = err
|
||||
h.passed = passed
|
||||
h.mu.Unlock()
|
||||
}
|
||||
|
||||
func checkStorageHealth(s storage.Storage, now func() time.Time) error {
|
||||
a := storage.AuthRequest{
|
||||
ID: storage.NewID(),
|
||||
ClientID: storage.NewID(),
|
||||
|
||||
// Set a short expiry so if the delete fails this will be cleaned up quickly by garbage collection.
|
||||
Expiry: now().Add(time.Minute),
|
||||
}
|
||||
|
||||
if err := s.CreateAuthRequest(a); err != nil {
|
||||
return fmt.Errorf("create auth request: %v", err)
|
||||
}
|
||||
if err := s.DeleteAuthRequest(a.ID); err != nil {
|
||||
return fmt.Errorf("delete auth request: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *healthChecker) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
h.mu.RLock()
|
||||
err := h.err
|
||||
t := h.passed
|
||||
h.mu.RUnlock()
|
||||
|
||||
if err != nil {
|
||||
h.s.renderError(w, http.StatusInternalServerError, "Health check failed.")
|
||||
return
|
||||
}
|
||||
fmt.Fprintf(w, "Health check passed in %s", t)
|
||||
|
||||
Reference in New Issue
Block a user