Skip to content

Commit

Permalink
feat: add user agent parser to enrich event data with browser details
Browse files Browse the repository at this point in the history
Resolves #6 by processing incoming JSON data to extract the user agent string from the
`browser.agent` field and using YAUAA to parse it. Enriches the JSON node with detailed
browser, device, and OS information.

Changes:

- Added YAUAA library for user agent parsing.
- Integrated Kotest for testing and wrote unit tests for deserialization.

Co-authored-by: Gaëtan Muller <m.gaetan89@gmail.com>
  • Loading branch information
jboix and MGaetan89 committed Oct 29, 2024
1 parent 95a55fa commit d733f7a
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 13 deletions.
5 changes: 4 additions & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,17 @@ repositories {
dependencies {
implementation("org.springframework.boot:spring-boot-starter-aop")
implementation("org.springframework.boot:spring-boot-starter-actuator")
implementation("org.opensearch.client:spring-data-opensearch-starter:1.5.2")
implementation("org.opensearch.client:spring-data-opensearch-starter:1.5.3")
implementation("org.springframework.boot:spring-boot-starter-webflux")
implementation("com.fasterxml.jackson.module:jackson-module-kotlin")
implementation("org.jetbrains.kotlin:kotlin-reflect")
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core")
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-reactor")
implementation("com.github.ben-manes.caffeine:caffeine")
implementation("nl.basjes.parse.useragent:yauaa:7.28.1")
testImplementation("io.kotest:kotest-runner-junit5:5.9.1")
testImplementation("org.springframework.boot:spring-boot-starter-test")
testImplementation("io.kotest.extensions:kotest-extensions-spring:1.3.0")
testImplementation("org.jetbrains.kotlin:kotlin-test-junit5")
testRuntimeOnly("org.junit.platform:junit-platform-launcher")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ package ch.srgssr.pillarbox.monitoring.event.model

import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.core.JsonParser
import com.fasterxml.jackson.databind.DeserializationContext
import com.fasterxml.jackson.databind.JsonDeserializer
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.node.ObjectNode
import nl.basjes.parse.useragent.UserAgent
import nl.basjes.parse.useragent.UserAgentAnalyzer
import org.springframework.data.annotation.Id
import org.springframework.data.elasticsearch.annotations.DateFormat
import org.springframework.data.elasticsearch.annotations.Document
Expand Down Expand Up @@ -32,6 +40,77 @@ data class EventRequest(
@Field(type = FieldType.Date, format = [DateFormat.epoch_millis], name = "@timestamp")
var timestamp: Long,
var version: Long,
@JsonDeserialize(using = DataDeserializer::class)
var data: Any? = null,
var session: Any? = null,
)

/**
* Custom deserializer for the 'data' field in [EventRequest].
*
* This deserializer processes the incoming JSON data to extract the user agent string from the
* `browser.agent` field and enriches the JSON node with detailed information about the browser,
* device, and operating system.
*
* If the `browser.agent` field is not present, the deserializer returns the node unmodified.
*/
private class DataDeserializer : JsonDeserializer<JsonNode?>() {
companion object {
private val userAgentAnalyzer =
UserAgentAnalyzer
.newBuilder()
.hideMatcherLoadStats()
.withCache(10000)
.build()
}

override fun deserialize(
parser: JsonParser,
ctxt: DeserializationContext,
): JsonNode {
val node: JsonNode = parser.codec.readTree(parser)
val browserNode = (node as? ObjectNode)?.get("browser")
val userAgent =
(browserNode as? ObjectNode)
?.get("agent")
?.asText()
?.let(userAgentAnalyzer::parse) ?: return node

node.set<ObjectNode>(
"browser",
browserNode.apply {
put("name", userAgent.getValueOrNull("AgentName"))
put("version", userAgent.getValueOrNull("AgentVersion"))
},
)

node.set<ObjectNode>(
"device",
ObjectNode(ctxt.nodeFactory).apply {
put("name", userAgent.getValueOrNull("DeviceName"))
put("version", userAgent.getValueOrNull("DeviceVersion"))
},
)

node.set<ObjectNode>(
"os",
ObjectNode(ctxt.nodeFactory).apply {
put("name", userAgent.getValueOrNull("OperatingSystemName"))
put("version", userAgent.getValueOrNull("OperatingSystemVersion"))
},
)

return node
}
}

/**
* Private extension function for [UserAgent] to return `null` instead of "??" for unknown values.
*
* @param fieldName The name of the field to retrieve.
* @return The value of the field, or `null` if the value is "??".
*/
private fun UserAgent.getValueOrNull(fieldName: String): String? {
val value = this.getValue(fieldName)
return if (value == "??") null else value
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package ch.srgssr.pillarbox.monitoring

import io.kotest.core.config.AbstractProjectConfig
import io.kotest.extensions.spring.SpringExtension

class TestProjectConfig : AbstractProjectConfig() {
override fun extensions() = listOf(SpringExtension)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package ch.srgssr.pillarbox.monitoring.event.model

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.databind.node.ObjectNode
import com.fasterxml.jackson.module.kotlin.readValue
import io.kotest.core.spec.style.ShouldSpec
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
import org.springframework.boot.test.context.SpringBootTest

@SpringBootTest
class EventRequestTest(
private val objectMapper: ObjectMapper,
) : ShouldSpec({
should("deserialize an event and resolve user agent") {
// Given: an input with a user agent
val jsonInput =
"""
{
"session_id": "12345",
"event_name": "START",
"timestamp": 1630000000000,
"version": 1,
"data": {
"browser": {
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
}
}
""".trimIndent()

// When: the event is deserialized
val eventRequest = objectMapper.readValue<EventRequest>(jsonInput)

// Then: The user agent data should have been resolved
val dataNode = eventRequest.data as? ObjectNode
dataNode shouldNotBe null

val browserNode = dataNode?.get("browser") as? ObjectNode
browserNode shouldNotBe null
browserNode?.get("name")?.asText() shouldBe "Chrome"
browserNode?.get("version")?.asText() shouldBe "129"

val deviceNode = dataNode?.get("device") as? ObjectNode
deviceNode shouldNotBe null
deviceNode?.get("name")?.asText() shouldBe "Apple Macintosh"

val osNode = dataNode?.get("os") as? ObjectNode
osNode shouldNotBe null
osNode?.get("name")?.asText() shouldBe "Mac OS"
osNode?.get("version")?.asText() shouldBe ">=10.15.7"
}

should("retain existing data when deserializing an event without user agent") {
// Given: an input without an agent
val jsonInput =
"""
{
"session_id": "12345",
"event_name": "START",
"timestamp": 1630000000000,
"version": 1,
"data": {
"browser": {
"name": "Firefox",
"version": "2.0"
}
}
}
""".trimIndent()

// When: the event is deserialized
val eventRequest = objectMapper.readValue<EventRequest>(jsonInput)

// Then: The data for browser, os and device should not have been modified
val dataNode = eventRequest.data as? ObjectNode
dataNode shouldNotBe null

val browserNode = dataNode?.get("browser") as? ObjectNode
browserNode shouldNotBe null
browserNode?.get("name")?.asText() shouldBe "Firefox"
browserNode?.get("version")?.asText() shouldBe "2.0"

dataNode?.get("device") shouldBe null
dataNode?.get("os") shouldBe null
}
})

0 comments on commit d733f7a

Please sign in to comment.