Monitoring

The EaaS gateway, i.e. the resource provides a monitoring API to retrieve statistics about its current state.

Usage Examples

Note

all API calls shoud include an additional HTTP header X-Admin-Access-Token: <ACCESS-TOKEN>, with the access token defined in the eaas-config.yaml as clustermanager.admin_api_access_token.

Retrieve all active cluster names:

GET /eaas/api/v1/clusters

Retrieve current state of a specific cluster:

GET /eaas/api/v1/clusters/<CLUSTER-NAME>

Note

the following examples retrieve subsets of the cluster state.

Retrieve the current state of a node provider:

GET /eaas/api/v1/clusters/<CLUSTER-NAME>/providers/<PROVIDER-NAME>

Retrieve stats of a provider:

GET /eaas/api/v1/clusters/<CLUSTER-NAME>/providers/<PROVIDER-NAME>/metrics

Retrieve information of a provider’s node pool:

GET /eaas/api/v1/clusters/<CLUSTER-NAME>/providers/<PROVIDER-NAME>/node-pool

Filter

The API further allows to filter a request for specific subsets of a provider.

To retrieve only config and metrics:

GET /eaas/api/v1/clusters/<CLUSTER-NAME>/providers/<PROVIDER-NAME>?fields=config,metrics

To get provider stats without its config:

GET /eaas/api/v1/clusters/<CLUSTER-NAME>/providers/<PROVIDER-NAME>?fields=!config

Examples

A full example of an idle google cloud EaaS gateway:

{
    "__timestamp": "2017-08-02T15:22:30.181",
    "__resource_type": "ClusterManager",
    "config": {
        "__timestamp": "2017-08-02T15:22:30.184",
        "__resource_type": "ClusterManagerConfig",
        "name": "default",
        "providers": [
            {
                "__timestamp": "2017-08-02T15:22:30.186",
                "__resource_type": "ResourceProviderConfig",
                "name": "default",
                "type": "gce",
                "deferred_allocations_gc_interval": "PT30S",
                "labels": {
                    "rank": "1"
                },
                "request_history": {
                    "update_interval": "PT30S",
                    "max_request_age": "PT5M",
                    "max_num_requests": 128
                },
                "preallocation": {
                    "request_history_multiplier": 0.5,
                    "min_bound": {
                        "cpu": "0m",
                        "memory": "0MB"
                    },
                    "max_bound": {
                        "cpu": "2147483647m",
                        "memory": "2147483647MB"
                    }
                },
                "node_allocator": {
                    "__timestamp": "2017-08-02T15:22:30.212",
                    "__resource_type": "NodeAllocatorConfigGCE",
                    "application_name": "eaas",
                    "project_id": "eaas-142812",
                    "zone_name": "europe-west1-d",
                    "network_name": "eaas-network",
                    "node_name_prefix": "emucomp-eu-",
                    "credentials_file": "/home/bwfla/.bwFLA/eaas-e2ca16db111c.json",
                    "vm": {
                        "machine_type": "n1-highcpu-8",
                        "persistent_disk": {
                            "type": "pd-standard",
                            "size": 10,
                            "image_url": "projects/eaas-142812/global/images/eaas-emucomp-20170406"
                        },
                        "accelerators": [],
                        "boot_poll_interval": "PT3S",
                        "boot_poll_interval_delta": "PT2S",
                        "max_num_boot_polls": 50
                    },
                    "api": {
                        "poll_interval": "PT2S",
                        "poll_interval_delta": "PT1S",
                        "retry_interval": "PT1S",
                        "retry_interval_delta": "PT2S",
                        "max_num_retries": 3
                    },
                    "healthcheck": {
                        "url_template": "http://{{address}}/emucomp/health",
                        "connect_timeout": "PT10S",
                        "read_timeout": "PT5S",
                        "failure_timeout": "PT2M",
                        "interval": "PT30S",
                        "num_parallel_requests": 4
                    }
                },
                "poolscaler": {
                    "__timestamp": "2017-08-02T15:22:30.218",
                    "__resource_type": "HomogeneousNodePoolScalerConfig",
                    "min_poolsize": 1,
                    "max_poolsize": 25,
                    "scaleup": {
                        "max_poolsize_adjustment": 10
                    },
                    "scaledown": {
                        "max_poolsize_adjustment": 20,
                        "node_warmup_period": "PT10M",
                        "node_cooldown_period": "PT15M"
                    },
                    "pool_scaling_interval": "PT1M"
                }
            }
        ]
    },
    "num_providers": 1,
    "providers": [
        {
            "__timestamp": "2017-08-02T15:22:30.223",
            "__resource_type": "ResourceProvider",
            "name": "default",
            "type": "gce",
            "config": {
                "__timestamp": "2017-08-02T15:22:30.223",
                "__resource_type": "ResourceProviderConfig",
                "name": "default",
                "type": "gce",
                "deferred_allocations_gc_interval": "PT30S",
                "labels": {
                    "rank": "1"
                },
                "request_history": {
                    "update_interval": "PT30S",
                    "max_request_age": "PT5M",
                    "max_num_requests": 128
                },
                "preallocation": {
                    "request_history_multiplier": 0.5,
                    "min_bound": {
                        "cpu": "0m",
                        "memory": "0MB"
                    },
                    "max_bound": {
                        "cpu": "2147483647m",
                        "memory": "2147483647MB"
                    }
                },
                "node_allocator": {
                    "__timestamp": "2017-08-02T15:22:30.224",
                    "__resource_type": "NodeAllocatorConfigGCE",
                    "application_name": "eaas",
                    "project_id": "eaas-142812",
                    "zone_name": "europe-west1-d",
                    "network_name": "eaas-network",
                    "node_name_prefix": "emucomp-eu-",
                    "credentials_file": "/home/bwfla/.bwFLA/eaas-e2ca16db111c.json",
                    "vm": {
                        "machine_type": "n1-highcpu-8",
                        "persistent_disk": {
                            "type": "pd-standard",
                            "size": 10,
                            "image_url": "projects/eaas-142812/global/images/eaas-emucomp-20170406"
                        },
                        "accelerators": [],
                        "boot_poll_interval": "PT3S",
                        "boot_poll_interval_delta": "PT2S",
                        "max_num_boot_polls": 50
                    },
                    "api": {
                        "poll_interval": "PT2S",
                        "poll_interval_delta": "PT1S",
                        "retry_interval": "PT1S",
                        "retry_interval_delta": "PT2S",
                        "max_num_retries": 3
                    },
                    "healthcheck": {
                        "url_template": "http://{{address}}/emucomp/health",
                        "connect_timeout": "PT10S",
                        "read_timeout": "PT5S",
                        "failure_timeout": "PT2M",
                        "interval": "PT30S",
                        "num_parallel_requests": 4
                    }
                },
                "poolscaler": {
                    "__timestamp": "2017-08-02T15:22:30.225",
                    "__resource_type": "HomogeneousNodePoolScalerConfig",
                    "min_poolsize": 1,
                    "max_poolsize": 25,
                    "scaleup": {
                        "max_poolsize_adjustment": 10
                    },
                    "scaledown": {
                        "max_poolsize_adjustment": 20,
                        "node_warmup_period": "PT10M",
                        "node_cooldown_period": "PT15M"
                    },
                    "pool_scaling_interval": "PT1M"
                }
            },
            "metrics": {
                "__timestamp": "2017-08-02T15:22:30.227",
                "__resource_type": "ResourceProviderMetrics",
                "num_requests": 1,
                "num_requests_deferred": 1,
                "num_requests_expired": 0,
                "num_requests_failed": 0
            },
            "node_pool": {
                "__timestamp": "2017-08-02T15:22:30.234",
                "__resource_type": "NodePool",
                "is_homogeneous": true,
                "capacity": {
                    "cpu": "8000m",
                    "memory": "7373MB"
                },
                "pending": {
                    "cpu": "0m",
                    "memory": "0MB"
                },
                "num_used_nodes": 0,
                "num_unused_nodes": 1,
                "num_healthy_nodes": 1,
                "num_unhealthy_nodes": 0,
                "num_nodes": 1,
                "nodes": [
                    {
                        "__timestamp": "2017-08-02T15:22:30.238",
                        "__resource_type": "Node",
                        "id": "104.155.67.78",
                        "healthy": true,
                        "used": false,
                        "uptime": "PT8M5.842S",
                        "unused_time": "PT4M50.761S",
                        "capacity": {
                            "cpu": "8000m",
                            "memory": "7373MB"
                        }
                    }
                ]
            },
            "node_allocator": {
                "__timestamp": "2017-08-02T15:22:30.242",
                "__resource_type": "NodeAllocatorGCE",
                "config": {
                    "__timestamp": "2017-08-02T15:22:30.243",
                    "__resource_type": "NodeAllocatorConfigGCE",
                    "application_name": "eaas",
                    "project_id": "eaas-142812",
                    "zone_name": "europe-west1-d",
                    "network_name": "eaas-network",
                    "node_name_prefix": "emucomp-eu-",
                    "credentials_file": "/home/bwfla/.bwFLA/eaas-e2ca16db111c.json",
                    "vm": {
                        "machine_type": "n1-highcpu-8",
                        "persistent_disk": {
                            "type": "pd-standard",
                            "size": 10,
                            "image_url": "projects/eaas-142812/global/images/eaas-emucomp-20170406"
                        },
                        "accelerators": [],
                        "boot_poll_interval": "PT3S",
                        "boot_poll_interval_delta": "PT2S",
                        "max_num_boot_polls": 50
                    },
                    "api": {
                        "poll_interval": "PT2S",
                        "poll_interval_delta": "PT1S",
                        "retry_interval": "PT1S",
                        "retry_interval_delta": "PT2S",
                        "max_num_retries": 3
                    },
                    "healthcheck": {
                        "url_template": "http://{{address}}/emucomp/health",
                        "connect_timeout": "PT10S",
                        "read_timeout": "PT5S",
                        "failure_timeout": "PT2M",
                        "interval": "PT30S",
                        "num_parallel_requests": 4
                    }
                },
                "node_capacity": {
                    "cpu": "8000m",
                    "memory": "7373MB"
                },
                "num_nodes": 1,
                "nodes": [
                    {
                        "__timestamp": "2017-08-02T15:22:30.245",
                        "__resource_type": "NodeInfo",
                        "id": "104.155.67.78",
                        "state": "REACHABLE",
                        "healthcheck_url": "http://104.155.67.78/emucomp/health",
                        "metadata": [
                            {
                                "vm_name": "emucomp-eu-0f7a54d8-51ac-4189-9117-9aaba404a77a"
                            }
                        ]
                    }
                ],
                "num_vm_names": 1,
                "vm_names": [
                    "emucomp-eu-0f7a54d8-51ac-4189-9117-9aaba404a77a"
                ]
            },
            "resource_allocator": {
                "__timestamp": "2017-08-02T15:22:30.250",
                "__resource_type": "ResourceAllocator",
                "capacity": {
                    "cpu": "8000m",
                    "memory": "7373MB"
                },
                "free_resources": {
                    "cpu": "8000m",
                    "memory": "7373MB"
                },
                "used_resources": {
                    "cpu": "0m",
                    "memory": "0MB"
                },
                "num_allocations": 0,
                "num_nodes": 1,
                "nodes": [
                    {
                        "__timestamp": "2017-08-02T15:22:30.252",
                        "__resource_type": "NodeInfo",
                        "id": "104.155.67.78",
                        "free_resources": {
                            "cpu": "8000m",
                            "memory": "7373MB"
                        },
                        "num_allocations": 0,
                        "allocations": []
                    }
                ],
                "index": {
                    "__timestamp": "2017-08-02T15:22:30.253",
                    "__resource_type": "ResourceIndex",
                    "num_entries": 1,
                    "entries": [
                        {
                            "key": 34359738375373,
                            "num_nodes": 1,
                            "nodes": [
                                "104.155.67.78"
                            ]
                        }
                    ]
                }
            },
            "allocation_requests": {
                "__timestamp": "2017-08-02T15:22:30.255",
                "__resource_type": "AllocationRequestQueue",
                "resources_sum": {
                    "cpu": "0m",
                    "memory": "0MB"
                },
                "num_entries": 0,
                "entries": []
            }
        }
    ]
}

A provider example for active blade gateway (currently two active sessions):

{
    "__timestamp": "2017-08-03T14:13:53.798",
    "__resource_type": "ResourceProvider",
    "name": "fallback",
    "type": "blades",
    "metrics": {
        "__timestamp": "2017-08-03T14:13:53.798",
        "__resource_type": "ResourceProviderMetrics",
        "num_requests": 3,
        "num_requests_deferred": 1,
        "num_requests_expired": 0,
        "num_requests_failed": 0
    },
    "node_pool": {
        "__timestamp": "2017-08-03T14:13:53.798",
        "__resource_type": "NodePool",
        "is_homogeneous": true,
        "capacity": {
            "cpu": "16000m",
            "memory": "16384MB"
        },
        "pending": {
            "cpu": "0m",
            "memory": "0MB"
        },
        "num_used_nodes": 1,
        "num_unused_nodes": 0,
        "num_healthy_nodes": 1,
        "num_unhealthy_nodes": 0,
        "num_nodes": 1,
        "nodes": [
            {
                "__timestamp": "2017-08-03T14:13:53.798",
                "__resource_type": "Node",
                "id": "localhost:8080",
                "healthy": true,
                "used": true,
                "uptime": "PT6M39.666S",
                "unused_time": "PT0S",
                "capacity": {
                    "cpu": "16000m",
                    "memory": "16384MB"
                }
            }
        ]
    },
    "node_allocator": {
        "__timestamp": "2017-08-03T14:13:53.799",
        "__resource_type": "NodeAllocatorBLADES",
        "config": {
            "__timestamp": "2017-08-03T14:13:53.799",
            "__resource_type": "NodeAllocatorConfigBLADES",
            "healthcheck": {
                "url_template": "http://{{address}}/emucomp/health",
                "connect_timeout": "PT10S",
                "read_timeout": "PT5S",
                "failure_timeout": "PT2M",
                "interval": "PT30S",
                "num_parallel_requests": 4
            },
            "node_capacity": {
                "cpu": "16000m",
                "memory": "16384MB"
            },
            "node_addresses": [
                "localhost:8080"
            ]
        },
        "node_capacity": {
            "cpu": "16000m",
            "memory": "16384MB"
        },
        "num_used_nodes": 1,
        "used_nodes": [
            {
                "__timestamp": "2017-08-03T14:13:53.799",
                "__resource_type": "NodeInfo",
                "id": "localhost:8080",
                "state": "REACHABLE",
                "healthcheck_url": "http://localhost:8080/emucomp/health",
                "metadata": []
            }
        ],
        "num_free_nodes": 0,
        "free_nodes": [],
        "num_failed_nodes": 0,
        "failed_nodes": []
    },
    "resource_allocator": {
        "__timestamp": "2017-08-03T14:13:53.800",
        "__resource_type": "ResourceAllocator",
        "capacity": {
            "cpu": "16000m",
            "memory": "16384MB"
        },
        "free_resources": {
            "cpu": "14000m",
            "memory": "15360MB"
        },
        "used_resources": {
            "cpu": "2000m",
            "memory": "1024MB"
        },
        "num_allocations": 2,
        "num_nodes": 1,
        "nodes": [
            {
                "__timestamp": "2017-08-03T14:13:53.800",
                "__resource_type": "NodeInfo",
                "id": "localhost:8080",
                "free_resources": {
                    "cpu": "14000m",
                    "memory": "15360MB"
                },
                "num_allocations": 2,
                "allocations": [
                    {
                        "id": "d0f3b0f3-9d11-4475-8a25-252d30342588",
                        "spec": {
                            "cpu": "1000m",
                            "memory": "512MB"
                        }
                    },
                    {
                        "id": "4e932dc2-e537-4f0d-bcb1-ab31184d7fdd",
                        "spec": {
                            "cpu": "1000m",
                            "memory": "512MB"
                        }
                    }
                ]
            }
        ],
        "index": {
            "__timestamp": "2017-08-03T14:13:53.800",
            "__resource_type": "ResourceIndex",
            "num_entries": 1,
            "entries": [
                {
                    "key": 60129542159360,
                    "num_nodes": 1,
                    "nodes": [
                        "localhost:8080"
                    ]
                }
            ]
        }
    },
    "allocation_requests": {
        "__timestamp": "2017-08-03T14:13:53.801",
        "__resource_type": "AllocationRequestQueue",
        "resources_sum": {
            "cpu": "0m",
            "memory": "0MB"
        },
        "num_entries": 0,
        "entries": []
    }
}