Person with phone standing in the road in a hexagon with icons of phone, cloud, and a pinpoint above their head

One of the most popular and widely adopted business tool suites is Microsoft 365 (previously known as Microsoft Office 365) or just M365 for short. M365 offers a comprehensive array of applications and services, including email, document collaboration, video conferencing, and more, all hosted on the cloud. M365 provides numerous benefits in terms of flexibility and accessibility, but it also introduces new security, performance, and compliance challenges. This is why monitoring M365 applications is critically important for organizations of all sizes. Monitoring M365 applications is essential in helping businesses maintain a secure, efficient, and compliant digital environment.

In this blog, we’ll look into various parts of the M365 ecosystem. We begin our monitoring journey by gathering information about the health status of the services used within your subscription. We’ll then dig a little deeper into real-time user login monitoring.

Health information of your services subscribed by a tenant

In M365 adding agents for monitoring isn't possible. Microsoft offers M365 as a software as a service (SaaS); hence, you don’t have access to the underlying infrastructure and cannot install any language or infrastructure agents. To collect health metrics, we have to rely on native Microsoft tools and APIs by utilizing features like the Health Dashboard, auditing, reporting, and the Microsoft Graph API to track and analyze M365 performance, security, and user activity. This ensures a reliable digital workplace without third-party monitoring agents.

As part of Microsoft Graph REST APIs, we’re able to gather information about the health information of services subscribed by a tenant.

An easy way to gather this information on a continuous basis is to run a New Relic synthetic scripted API monitor.

Follow these steps to get started:

  1. Create a new synthetic monitor and then select Endpoint availability (Scripted API) as the monitor type.
  1. Select your account and provide a name of the monitor; for example, Microsoft 365 - service overview. Period should be set to the interval on how often you want to get the data for the service health.
  1. Click Select locations and select a single location (in our case it doesn’t make sense to provide additional information when running the API call from many locations).

  2. Continue to the Write script section. Here we’ll write the API test for the service health overview. Copy and paste the below script content into the Script editor.

  3. Click Save monitor.

/**
 * Feel free to explore, or check out the full documentation
 * https://docs.newrelic.com/docs/synthetics/new-relic-synthetics/scripting-monitors/writing-api-tests
 * for details.
 */

var assert = require('assert');
const request = require("request");
const { promisify } = require("util");
const promisifyRequestGet = promisify(request.get);
const promisifyRequestPost = promisify(request.post);

/**
 * VARIABLE DEFINITIONS
 */
var MSFT_GRAPH_ACCESS_TOKEN = $secure.MSFT_GRAPH_ACCESS_TOKEN;
var NEW_RELIC_ACCOUNT_ID = $secure.NEW_RELIC_ACCOUNT_ID;
var NEW_RELIC_INSIGHTS_INSERT_KEY = $secure.NEW_RELIC_INSIGHTS_INSERT_KEY;
var MSFT_TENANT_ID = $secure.MSFT_TENANT_ID;
var MSFT_CLIENT_ID = $secure.MSFT_CLIENT_ID;
var MSFT_CLIENT_SECRET = $secure.MSFT_CLIENT_SECRET;
var MSFT_USERNAME = $secure.MSFT_USERNAME;
var MSFT_USER_PASSWORD = $secure.MSFT_USER_PASSWORD;
var NEW_RELIC_EVENT_TYPE = 'M365ServiceOverview';

/**
 * Function to post events to the New Relic Events API
 * @param {*} body 
 * @returns {Promise<request.Response>}
 */
async function insertInsightsEvent(body) {
    return await promisifyRequestPost({
        uri: 'https://insights-collector.newrelic.com/v1/accounts/' + NEW_RELIC_ACCOUNT_ID + '/events',
        body: body,
        headers: {
            'X-Insert-Key': NEW_RELIC_INSIGHTS_INSERT_KEY,
            'Content-Type': 'application/json'
        }
    });
}

/**
 * Function to record data in NRDB.
 * @param events
 * @returns {Promise<void>}
 */
async function recordData(events) {
    //console.log(events);
    let body = JSON.stringify(events, null, 2);
    const insightsResponse = await insertInsightsEvent(body);
    if (insightsResponse.statusCode !== 200) {
        console.log("insertInsightsEvent() non-200 return code: " + insightsResponse.statusCode);
    }
    else {
        console.log('Script executed successfully');
    }
}

var urlAuthBody = '';
urlAuthBody += 'client_id='+MSFT_CLIENT_ID;
urlAuthBody += '&client_secret='+MSFT_CLIENT_SECRET;
urlAuthBody += '&response_type=token';
urlAuthBody += '&scope=ServiceHealth.Read.All%20user.read%20openid%20profile%20offline_access';
urlAuthBody += '&username='+MSFT_USERNAME;
urlAuthBody += '&password='+MSFT_USER_PASSWORD;
urlAuthBody += '&grant_type=password';

var options = {
    body: urlAuthBody,
    headers: {
        'Content-Type': 'application/x-www-form-urlencoded'
    }
}

var urlAuth = 'https://login.microsoftonline.com/' + MSFT_TENANT_ID + '/oauth2/v2.0/token?';

$http.post(urlAuth, options,
    // Callback
    function (err, response, body) {
        assert.equal(response.statusCode, 200, 'Expected a 200 OK response');

        var responseStr = response.body;
        var jsonResponse = JSON.parse(responseStr);

        var options = {
            headers: {
                'Authorization': 'Bearer ' + jsonResponse.access_token,
                'Host': 'graph.microsoft.com'
            }
        }

        $http.get('https://graph.microsoft.com/v1.0/admin/serviceAnnouncement/healthOverviews', options,
            // Callback
            function (err, response, body) {
                assert.equal(response.statusCode, 200, 'Expected a 200 OK response');

                var responseStr = response.body;
                var jsonResponse = JSON.parse(responseStr);
                //console.log('Response: ' + JSON.stringify(jsonResponse.value));   
                var jsonResponseValue = jsonResponse.value;

                jsonResponseValue.forEach((item, index) => {
                    item.eventType = NEW_RELIC_EVENT_TYPE;
                    item.location = $env.LOCATION;
                    if (item.status == "serviceDegradation") {
                        item.statusVal = 1
                    }
                    else if (item.status == "serviceOperational") {
                        item.statusVal = 0
                    }

                });
                recordData(jsonResponseValue);
            }
        )
    }
);

You can also find the latest version of the script in my GitHub repository that accompanies this blog series.

As you can see in the code, it leverages some secure credentials that are being stored in New Relic. Create each of these secure credentials in your New Relic synthetics environment. The credentials are as follows:

Secure Credential

Description

NEW_RELIC_ACCOUNT_ID

Your New Relic account ID used to store the custom events from the synthetic check

NEW_RELIC_INSIGHTS_INSERT_KEY

Your Insights insert key for your New Relic account

MSFT_TENANT_ID

Microsoft 365 tenant ID

MSFT_CLIENT_ID

Azure AD app registration client ID

MSFT_CLIENT_SECRET

Azure AD app registration client secret

MSFT_USERNAME

Azure AD username of the user on whose behalf this check is executed

MSFT_USER_PASSWORD

Azure AD password on whose behalf this check is executed

The logic of the script can roughly be broken down in three pieces:

  1. Retrieving an access token for the user and the “virtual application” by making a request to https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token.
  2. Leveraging the retrieved token from step 1 and making a call to Microsoft Graph API (that is, https://graph.microsoft.com/v1.0/admin/serviceAnnouncement/healthOverviews) to retrieve the service health information.
  3. Sending the gathered information as a custom event to the New Relic platform by calling recordData(events), which in turn then posts the data with insertInsightsEvent(body).

Now that we have the health data for all of our M365 services within our subscription reported in New Relic, it’s time for us to visualize the data. The following screenshot shows a sample representation of a dashboard that can be used for getting an up-to-date overview of the service health information. The JSON representation of the dashboard that you can use to import into your New Relic account is available in my GitHub repository here.

As you can see, we can easily identify the number of services reporting some kind of degradation in the pie chart on the top left. The right-hand side shows more detail about which services are impacted.

Note: The dashboard above uses a custom visualization called Status Table Widget that’s part of the Status Widget Pack. This custom visualization is provided by New Relic and available in the Apps section under Custom Visualization in your New Relic account.

The event type for the above-mentioned data is stored in the event type that can be configured in the API script by defining the NEW_RELIC_EVENT_TYPE variable. All the raw data is available as usual in the Query Your Data part of the New Relic platform.

User login monitoring

A typical request from customers that I often hear is: “I want to be able to understand how the user experience looks for my internal users.” The information that I’m typically interested in includes the following:

  • Availability of the M365 websites (for example, login.microsoftonline.com).
  • Latency of the connection to M365 websites.
  • A report that shows the latency and the availability over time (for example, the last 30 days).

A pretty straightforward way to achieve this is by leveraging a New Relic synthetic scripted browser monitor.

Use the steps below to create and configure the synthetic scripted browser:

  1. Create a new synthetic monitor and select User flow / functionality (Scripted Browser) as the monitor type.
  1. Select your account and provide a name of the monitor; for example, Microsoft 365 - user login. Period should be set to the interval on how often you want to run the checks for the user login.
  1. Click Select locations and select all the locations that are relevant for your business. These locations should be close to your end users who are typically using M365 on a daily business basis.

  2. Continue to the Write script section. Here we’ll write the scripted test for the user login workflow. Copy and paste the below script content into the Script editor.

  3. Click Save monitor.

/**
 * Feel free to explore, or check out the full documentation
 * https://docs.newrelic.com/docs/synthetics/new-relic-synthetics/scripting-monitors/writing-scripted-browsers
 * for details.
 */

var assert = require('assert');
var DefaultTimeout = 30000;

// the next function is removed for better reading purposes of the code
// full code available at https://github.com/harrykimpel/newrelic-microsoft-observability/blob/main/synthetics/user-login/user-login.js
const logger = function (...);

$webDriver.getCapabilities().then(function () { })
  // Test Case: M365  
  .then(function () {
    return Promise.resolve(true)

      .then(function () { return logger.log(1, "Open URL https://portal.office.com", "M365"), $webDriver.get("https://portal.office.com").then(e => e) })
      .then(function () { return logger.log(2, "Type username", "M365"), $browser.waitForAndFindElement($selenium.By.id("i0116"), DefaultTimeout).then(e => (e.sendKeys($secure.MSFT_USERNAME), Promise.resolve(!0))) })
      .then(function () { return logger.log(3, "Click By.id(\"idSIButton9\")", "M365"), $browser.waitForAndFindElement($selenium.By.id("idSIButton9"), DefaultTimeout).then(e => (e.click(), Promise.resolve(!0))) })
      .then(function () { return logger.log(4, "Sleep a little", "M365"), $browser.sleep (1000) })
      .then(function () { return logger.log(5, "Type password", "M365"), $browser.waitForAndFindElement($selenium.By.id("i0118"), DefaultTimeout).then(e => (e.sendKeys($secure.MSFT_USER_PASSWORD), Promise.resolve(!0))) })
      .then(function () { return logger.log(6, "Click By.id(\"idSIButton9\")", "M365"), $browser.waitForAndFindElement($selenium.By.id("idSIButton9"), DefaultTimeout).then(e => (e.click(), Promise.resolve(!0))) })
      .then(function () { return logger.log(7, "Click By.css(\"span\")", "M365"), $browser.waitForAndFindElement($selenium.By.css("span"), DefaultTimeout).then(e => (e.click(), Promise.resolve(!0))) })
      .then(function () { return logger.log(8, "Click By.id(\"idBtn_Back\")", "M365"), $browser.waitForAndFindElement($selenium.By.id("idBtn_Back"), DefaultTimeout).then(e => (e.click(), Promise.resolve(!0))) })
      .then(function () { return logger.log(9, "Click By.id(\"officeHome__content\")", "M365"), $browser.waitForAndFindElement($selenium.By.id("officeHome__content"), DefaultTimeout).then(e => (e.click(), Promise.resolve(!0))) })
      .then(function () {
        logger.endTestCase("M365");
      }, function (err) {
        logger.error(err, "M365");
        throw (err);
      });

  })

You can also find the latest version of the script in my GitHub repository that accompanies this blog series.

As you can see in the code, it leverages some secure credentials that are being stored in New Relic. Go ahead and create each of these secure credentials in your New Relic synthetics environment. The credentials are as follows:

Secure Credential

Description

MSFT_USERNAME

Azure AD username of the user on whose behalf this check is executed

MSFT_USER_PASSWORD

Azure AD password on whose behalf this check is executed

The logic of the script can roughly be broken down into three pieces:

  1. Access https://portal.office.com.
  2. Enter username and password for a test user from secure credentials.
  3. Log in the user and enter the M365 portal home page.

The script will now continuously execute based on the specified period for the locations selected. If all of the steps are successful, then the status of the overall script execution will be successful. In case of any errors or timeouts, the script will report a script failure. All of the data is available in the New Relic synthetics UI.