Kotlin基础知识点 #130:集合操作:flatMap、groupBy、partition

难度:⭐⭐

🎯 问题

处理嵌套集合、按条件分组数据、将集合拆分为两部分等操作在Android开发中很常见。如何用Kotlin优雅地实现这些复杂的集合转换?flatMapgroupBypartition如何简化这些场景?

💡 核心概念

flatMap: 将集合的每个元素映射为集合,然后扁平化为单个集合(map + flatten)
groupBy: 按指定键将集合分组为Map
partition: 将集合按条件分为两个列表(满足和不满足)

代码示例

示例1:flatMap基础用法

// map vs flatMap对比
fun testFlatMapBasic() {
    val numbers = listOf(1, 2, 3)

    // map: 每个元素映射为一个值
    val doubled = numbers.map { it * 2 }
    println(doubled)  // [2, 4, 6]

    // map: 每个元素映射为一个列表
    val mapped = numbers.map { listOf(it, it * 2) }
    println(mapped)  // [[1, 2], [2, 4], [3, 6]] - 嵌套列表

    // flatMap: 映射后扁平化
    val flatMapped = numbers.flatMap { listOf(it, it * 2) }
    println(flatMapped)  // [1, 2, 2, 4, 3, 6] - 扁平列表
}

// flatMap = map + flatten
fun testMapFlatten() {
    val numbers = listOf(1, 2, 3)

    // 分步操作
    val step1 = numbers.map { listOf(it, it * 2) }  // [[1,2], [2,4], [3,6]]
    val step2 = step1.flatten()                      // [1, 2, 2, 4, 3, 6]

    // 等价于flatMap
    val result = numbers.flatMap { listOf(it, it * 2) }

    println(step2 == result)  // true
}

// Android场景:展开嵌套数据
data class User(val name: String, val devices: List<Device>)
data class Device(val id: String, val name: String)

class UserDeviceManager {
    fun getAllDevices(users: List<User>): List<Device> {
        // ❌ 传统方式:嵌套循环
        val devices1 = mutableListOf<Device>()
        for (user in users) {
            for (device in user.devices) {
                devices1.add(device)
            }
        }

        // ✅ 使用flatMap:一行搞定
        val devices2 = users.flatMap { it.devices }

        return devices2
    }

    fun getAllDeviceNames(users: List<User>): List<String> {
        // 链式操作:展开 + 转换
        return users
            .flatMap { it.devices }
            .map { it.name }
    }

    fun getUniqueDeviceIds(users: List<User>): Set<String> {
        return users
            .flatMap { it.devices }
            .map { it.id }
            .toSet()  // 去重
    }
}

// 复杂flatMap:展开并过滤
class OrderManager {
    data class Order(val orderId: String, val items: List<OrderItem>)
    data class OrderItem(val productId: String, val quantity: Int, val price: Double)

    fun getExpensiveProducts(orders: List<Order>, minPrice: Double): List<String> {
        return orders
            .flatMap { it.items }                   // 展开所有订单项
            .filter { it.price >= minPrice }        // 过滤高价商品
            .map { it.productId }                   // 提取产品ID
            .distinct()                             // 去重
    }

    fun calculateTotalRevenue(orders: List<Order>): Double {
        return orders
            .flatMap { it.items }
            .sumOf { it.price * it.quantity }
    }
}

示例2:groupBy分组操作

// 基础groupBy
fun testBasicGroupBy() {
    val numbers = listOf(1, 2, 3, 4, 5, 6)

    // 按奇偶分组
    val grouped = numbers.groupBy { it % 2 == 0 }
    println(grouped)
    // {false=[1, 3, 5], true=[2, 4, 6]}

    val evens = grouped[true]   // [2, 4, 6]
    val odds = grouped[false]   // [1, 3, 5]
}

// 按字符串属性分组
fun testGroupByString() {
    val words = listOf("apple", "banana", "apricot", "blueberry", "cherry")

    // 按首字母分组
    val byFirstLetter = words.groupBy { it.first() }
    println(byFirstLetter)
    // {a=[apple, apricot], b=[banana, blueberry], c=[cherry]}

    // 按长度分组
    val byLength = words.groupBy { it.length }
    println(byLength)
    // {5=[apple], 6=[banana, cherry], 7=[apricot], 9=[blueberry]}
}

// Android场景:设备分组
data class Device(
    val id: String,
    val name: String,
    val type: String,
    val room: String,
    val isOnline: Boolean
)

class DeviceGroupManager {
    private val devices = listOf(
        Device("D001", "Camera 1", "camera", "Living Room", true),
        Device("D002", "Camera 2", "camera", "Bedroom", true),
        Device("D003", "Lock", "lock", "Front Door", false),
        Device("D004", "Doorbell", "doorbell", "Front Door", true),
        Device("D005", "Camera 3", "camera", "Kitchen", false)
    )

    // 按类型分组
    fun groupByType(): Map<String, List<Device>> {
        return devices.groupBy { it.type }
        // {camera=[D001, D002, D005], lock=[D003], doorbell=[D004]}
    }

    // 按房间分组
    fun groupByRoom(): Map<String, List<Device>> {
        return devices.groupBy { it.room }
    }

    // 按在线状态分组
    fun groupByOnlineStatus(): Map<Boolean, List<Device>> {
        return devices.groupBy { it.isOnline }
    }

    // 多级分组:先按类型,再按房间
    fun groupByTypeAndRoom(): Map<String, Map<String, List<Device>>> {
        return devices
            .groupBy { it.type }
            .mapValues { (_, deviceList) ->
                deviceList.groupBy { it.room }
            }
    }

    // 分组后统计数量
    fun countByType(): Map<String, Int> {
        return devices
            .groupBy { it.type }
            .mapValues { (_, deviceList) -> deviceList.size }
        // {camera=3, lock=1, doorbell=1}
    }

    // 分组后转换
    fun getDeviceNamesByType(): Map<String, List<String>> {
        return devices
            .groupBy { it.type }
            .mapValues { (_, deviceList) ->
                deviceList.map { it.name }
            }
    }
}

// 复杂分组场景
class AnalyticsManager {
    data class Event(
        val userId: String,
        val eventType: String,
        val timestamp: Long,
        val value: Double
    )

    fun analyzeEvents(events: List<Event>) {
        // 按用户ID分组
        val byUser = events.groupBy { it.userId }

        // 按事件类型分组并计算总值
        val totalByType = events
            .groupBy { it.eventType }
            .mapValues { (_, eventList) ->
                eventList.sumOf { it.value }
            }

        // 按时间段分组(每小时)
        val byHour = events.groupBy { event ->
            val hour = event.timestamp / 3600000  // 转换为小时
            hour
        }

        println("用户事件数: ${byUser.mapValues { it.value.size }}")
        println("类型总值: $totalByType")
        println("每小时事件数: ${byHour.mapValues { it.value.size }}")
    }
}

示例3:partition分区操作

// 基础partition
fun testBasicPartition() {
    val numbers = listOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

    // 分为两组:满足条件和不满足条件
    val (evens, odds) = numbers.partition { it % 2 == 0 }

    println("偶数: $evens")  // [2, 4, 6, 8, 10]
    println("奇数: $odds")   // [1, 3, 5, 7, 9]
}

// partition vs filter对比
fun testPartitionVsFilter() {
    val numbers = listOf(1, 2, 3, 4, 5, 6)

    // ❌ 使用filter需要遍历两次
    val evens1 = numbers.filter { it % 2 == 0 }
    val odds1 = numbers.filter { it % 2 != 0 }

    // ✅ 使用partition只遍历一次
    val (evens2, odds2) = numbers.partition { it % 2 == 0 }
}

// Android场景:设备分区
class DevicePartitionManager {
    private val devices = listOf(
        Device("D001", "Camera 1", "camera", "Living Room", true),
        Device("D002", "Camera 2", "camera", "Bedroom", false),
        Device("D003", "Lock", "lock", "Front Door", true),
        Device("D004", "Doorbell", "doorbell", "Front Door", false)
    )

    // 按在线状态分区
    fun partitionByOnlineStatus() {
        val (online, offline) = devices.partition { it.isOnline }

        println("在线设备: ${online.size}")
        println("离线设备: ${offline.size}")

        updateOnlineUI(online)
        updateOfflineUI(offline)
    }

    // 分区并进一步处理
    fun processDevices() {
        val (cameras, others) = devices.partition { it.type == "camera" }

        // 分别处理两组设备
        cameras.forEach { camera ->
            println("配置摄像头: ${camera.name}")
        }

        others.forEach { device ->
            println("配置其他设备: ${device.name}")
        }
    }

    // 多条件分区
    fun complexPartition() {
        // 第一次分区:在线 vs 离线
        val (online, offline) = devices.partition { it.isOnline }

        // 第二次分区:在线设备中,摄像头 vs 其他
        val (onlineCameras, onlineOthers) = online.partition { it.type == "camera" }

        println("在线摄像头: ${onlineCameras.size}")
        println("在线其他设备: ${onlineOthers.size}")
        println("离线设备: ${offline.size}")
    }

    private fun updateOnlineUI(devices: List<Device>) {}
    private fun updateOfflineUI(devices: List<Device>) {}
}

// 验证场景:分区验证数据
class DataValidator {
    data class InputData(val id: String, val value: String)

    fun validateAndProcess(data: List<InputData>) {
        // 分区:有效 vs 无效
        val (valid, invalid) = data.partition { input ->
            input.value.isNotEmpty() && input.value.length <= 100
        }

        // 处理有效数据
        valid.forEach { processValid(it) }

        // 记录无效数据
        if (invalid.isNotEmpty()) {
            logErrors(invalid)
        }

        println("有效: ${valid.size}, 无效: ${invalid.size}")
    }

    private fun processValid(data: InputData) {
        println("处理有效数据: ${data.id}")
    }

    private fun logErrors(data: List<InputData>) {
        data.forEach { println("无效数据: ${it.id}") }
    }
}

示例4:综合应用

// 场景:社交应用的消息处理
class MessageProcessor {
    data class Message(
        val id: String,
        val senderId: String,
        val content: String,
        val timestamp: Long,
        val isRead: Boolean,
        val hasAttachment: Boolean
    )

    data class Conversation(
        val userId: String,
        val messages: List<Message>,
        val unreadCount: Int
    )

    fun processMessages(messages: List<Message>): List<Conversation> {
        // 1. 按发送者分组
        val byUser = messages.groupBy { it.senderId }

        // 2. 转换为会话列表
        return byUser.map { (userId, userMessages) ->
            // 3. 分区:未读 vs 已读
            val (unread, _) = userMessages.partition { !it.isRead }

            Conversation(
                userId = userId,
                messages = userMessages.sortedByDescending { it.timestamp },
                unreadCount = unread.size
            )
        }.sortedByDescending { it.unreadCount }  // 按未读数排序
    }

    // 提取所有附件消息
    fun extractAttachments(messages: List<Message>): Map<String, List<Message>> {
        return messages
            .filter { it.hasAttachment }
            .groupBy { it.senderId }
    }

    // 分析消息统计
    fun analyzeMessages(messages: List<Message>): Map<String, Any> {
        val (withAttachment, withoutAttachment) = messages.partition { it.hasAttachment }
        val (read, unread) = messages.partition { it.isRead }
        val byUser = messages.groupBy { it.senderId }

        return mapOf(
            "total" to messages.size,
            "withAttachment" to withAttachment.size,
            "withoutAttachment" to withoutAttachment.size,
            "read" to read.size,
            "unread" to unread.size,
            "uniqueUsers" to byUser.size
        )
    }
}

// Android场景:完整的设备管理
class DeviceManagementViewModel : ViewModel() {
    data class DeviceDetail(
        val device: Device,
        val events: List<DeviceEvent>
    )

    data class DeviceEvent(
        val type: String,
        val timestamp: Long,
        val message: String
    )

    fun analyzeDeviceData(deviceDetails: List<DeviceDetail>) {
        // 1. flatMap: 展开所有事件
        val allEvents = deviceDetails.flatMap { it.events }

        // 2. groupBy: 按事件类型分组
        val eventsByType = allEvents.groupBy { it.type }

        // 3. partition: 分离在线和离线设备
        val (online, offline) = deviceDetails.partition { it.device.isOnline }

        // 4. 组合操作:统计每个房间的设备状态
        val roomStats = deviceDetails
            .groupBy { it.device.room }
            .mapValues { (_, details) ->
                val (onlineInRoom, offlineInRoom) = details.partition { it.device.isOnline }
                mapOf(
                    "online" to onlineInRoom.size,
                    "offline" to offlineInRoom.size,
                    "total" to details.size
                )
            }

        // 5. 复杂分析:每个设备类型的事件统计
        val eventStatsByType = deviceDetails
            .groupBy { it.device.type }
            .mapValues { (_, details) ->
                val totalEvents = details.flatMap { it.events }.size
                val eventTypes = details
                    .flatMap { it.events }
                    .groupBy { it.type }
                    .mapValues { (_, events) -> events.size }

                mapOf(
                    "totalEvents" to totalEvents,
                    "eventBreakdown" to eventTypes
                )
            }

        logAnalysis(roomStats, eventStatsByType)
    }

    private fun logAnalysis(
        roomStats: Map<String, Map<String, Int>>,
        eventStats: Map<String, Map<String, Any>>
    ) {
        println("房间统计: $roomStats")
        println("事件统计: $eventStats")
    }
}

示例5:性能优化

// Sequence优化组合操作
class OptimizedProcessor {
    fun processLargeDataset(devices: List<Device>) {
        // ❌ 普通集合:多次遍历,创建多个中间集合
        val result1 = devices
            .flatMap { listOf(it, it) }    // 创建中间List
            .groupBy { it.type }            // 遍历并创建Map
            .mapValues { (_, list) ->
                list.partition { it.isOnline }
            }

        // ✅ 使用Sequence:惰性求值
        val result2 = devices.asSequence()
            .flatMap { sequenceOf(it, it) }
            .groupBy { it.type }             // 这里会执行
            .mapValues { (_, list) ->
                list.partition { it.isOnline }
            }
    }

    // 早期终止优化
    fun findFirstOnlineCamera(users: List<User>): Device? {
        // ✅ 使用Sequence + firstOrNull:找到后立即停止
        return users.asSequence()
            .flatMap { it.devices.asSequence() }
            .filter { it.type == "camera" }
            .firstOrNull { it.isOnline }
    }

    // 对比:不使用Sequence
    fun findFirstOnlineCameraNoSeq(users: List<User>): Device? {
        // ❌ 会处理所有用户的所有设备
        return users
            .flatMap { it.devices }
            .filter { it.type == "camera" }
            .firstOrNull { it.isOnline }
    }
}

// 性能测试
fun performanceTest() {
    val users = List(1000) { userId ->
        User(
            name = "User$userId",
            devices = List(100) { deviceId ->
                Device("D$deviceId", "Device$deviceId", "camera", "Room", deviceId % 10 == 0)
            }
        )
    }

    // 测试flatMap + filter(找到第一个即可)
    val time1 = measureTimeMillis {
        users.flatMap { it.devices }.filter { it.isOnline }.firstOrNull()
    }

    val time2 = measureTimeMillis {
        users.asSequence().flatMap { it.devices }.filter { it.isOnline }.firstOrNull()
    }

    println("List: ${time1}ms, Sequence: ${time2}ms")
    // 典型结果: List: 100ms, Sequence: 1ms
}

fun measureTimeMillis(block: () -> Unit): Long {
    val start = System.currentTimeMillis()
    block()
    return System.currentTimeMillis() - start
}

⚡ 关键要点

1. 何时使用flatMap/groupBy/partition

// flatMap: 嵌套集合需要扁平化
val allDevices = users.flatMap { it.devices }

// groupBy: 需要按某个属性分类
val byType = devices.groupBy { it.type }

// partition: 需要分为两组(满足 vs 不满足)
val (valid, invalid) = data.partition { it.isValid }

// 如果需要分为多组,用groupBy而不是多次partition
// ✅ 推荐
val byStatus = devices.groupBy { it.status }

// ❌ 不推荐
val (online, temp) = devices.partition { it.status == "online" }
val (offline, unknown) = temp.partition { it.status == "offline" }

2. groupBy vs partition对比

// partition: 只能分为两组
val (group1, group2) = list.partition { condition }

// groupBy: 可以分为多组
val groups = list.groupBy { key }

// 等价转换
val (evens, odds) = numbers.partition { it % 2 == 0 }
// 等价于
val grouped = numbers.groupBy { it % 2 == 0 }
val evens = grouped[true] ?: emptyList()
val odds = grouped[false] ?: emptyList()

3. 性能最佳实践

// 小数据集:直接使用集合操作
val result = smallList.flatMap { }.groupBy { }

// 大数据集:使用Sequence
val result = largeList.asSequence()
    .flatMap { }
    .groupBy { }

// 只需部分结果:Sequence + take
val first10 = hugeList.asSequence()
    .flatMap { it.items }
    .take(10)
    .toList()

// 避免不必要的操作
// ❌ 不推荐:先groupBy再flatMap
val result1 = items.groupBy { it.type }.values.flatten()
// ✅ 推荐:直接使用原集合
val result2 = items

4. 常见错误

// 错误1:groupBy后忘记处理空组
val byType = devices.groupBy { it.type }
// val cameras = byType["camera"]  // ❌ 可能返回null
val cameras = byType["camera"] ?: emptyList()  // ✅

// 错误2:flatMap中返回单个元素
// ❌ 错误:flatMap需要返回集合
// val result = list.flatMap { it.value }
val result = list.flatMap { listOf(it.value) }  // ✅

// 或者直接用map
val result2 = list.map { it.value }  // ✅ 更简单

// 错误3:partition后不使用解构
val partitioned = list.partition { it.isValid }
val valid = partitioned.first   // ❌ 不清晰
val invalid = partitioned.second
// ✅ 使用解构
val (valid2, invalid2) = list.partition { it.isValid }

🔗 相关知识点

  • #129 集合操作:map、filter、reduce - 基础集合操作
  • #131 序列(Sequence)vs 集合 - 性能优化
  • #121 高阶函数 - 函数式编程基础

总结

flatMapgroupBypartition是处理复杂集合转换的利器。flatMap用于扁平化嵌套结构,groupBy用于分组聚合,partition用于二分数据。这三个函数可以组合使用,处理各种复杂的数据转换场景。在Android开发中,它们广泛应用于消息处理、设备管理、数据分析等场景。对于大数据集,结合Sequence可以显著提升性能。掌握这些高级集合操作,能写出更简洁、更高效的数据处理代码。

Logo

开源鸿蒙跨平台开发社区汇聚开发者与厂商,共建“一次开发,多端部署”的开源生态,致力于降低跨端开发门槛,推动万物智联创新。

更多推荐