Kotlin基础知识点 #130:集合操作:flatMap、groupBy、partition
flatMapgroupBypartition是处理复杂集合转换的利器。flatMap用于扁平化嵌套结构,groupBy用于分组聚合,partition用于二分数据。这三个函数可以组合使用,处理各种复杂的数据转换场景。在Android开发中,它们广泛应用于消息处理、设备管理、数据分析等场景。对于大数据集,结合Sequence可以显著提升性能。掌握这些高级集合操作,能写出更简洁、更高效的数据处理代码
·
Kotlin基础知识点 #130:集合操作:flatMap、groupBy、partition
难度:⭐⭐
🎯 问题
处理嵌套集合、按条件分组数据、将集合拆分为两部分等操作在Android开发中很常见。如何用Kotlin优雅地实现这些复杂的集合转换?flatMap、groupBy、partition如何简化这些场景?
💡 核心概念
flatMap: 将集合的每个元素映射为集合,然后扁平化为单个集合(map + flatten)
groupBy: 按指定键将集合分组为Map
partition: 将集合按条件分为两个列表(满足和不满足)
代码示例
示例1:flatMap基础用法
// map vs flatMap对比
fun testFlatMapBasic() {
val numbers = listOf(1, 2, 3)
// map: 每个元素映射为一个值
val doubled = numbers.map { it * 2 }
println(doubled) // [2, 4, 6]
// map: 每个元素映射为一个列表
val mapped = numbers.map { listOf(it, it * 2) }
println(mapped) // [[1, 2], [2, 4], [3, 6]] - 嵌套列表
// flatMap: 映射后扁平化
val flatMapped = numbers.flatMap { listOf(it, it * 2) }
println(flatMapped) // [1, 2, 2, 4, 3, 6] - 扁平列表
}
// flatMap = map + flatten
fun testMapFlatten() {
val numbers = listOf(1, 2, 3)
// 分步操作
val step1 = numbers.map { listOf(it, it * 2) } // [[1,2], [2,4], [3,6]]
val step2 = step1.flatten() // [1, 2, 2, 4, 3, 6]
// 等价于flatMap
val result = numbers.flatMap { listOf(it, it * 2) }
println(step2 == result) // true
}
// Android场景:展开嵌套数据
data class User(val name: String, val devices: List<Device>)
data class Device(val id: String, val name: String)
class UserDeviceManager {
fun getAllDevices(users: List<User>): List<Device> {
// ❌ 传统方式:嵌套循环
val devices1 = mutableListOf<Device>()
for (user in users) {
for (device in user.devices) {
devices1.add(device)
}
}
// ✅ 使用flatMap:一行搞定
val devices2 = users.flatMap { it.devices }
return devices2
}
fun getAllDeviceNames(users: List<User>): List<String> {
// 链式操作:展开 + 转换
return users
.flatMap { it.devices }
.map { it.name }
}
fun getUniqueDeviceIds(users: List<User>): Set<String> {
return users
.flatMap { it.devices }
.map { it.id }
.toSet() // 去重
}
}
// 复杂flatMap:展开并过滤
class OrderManager {
data class Order(val orderId: String, val items: List<OrderItem>)
data class OrderItem(val productId: String, val quantity: Int, val price: Double)
fun getExpensiveProducts(orders: List<Order>, minPrice: Double): List<String> {
return orders
.flatMap { it.items } // 展开所有订单项
.filter { it.price >= minPrice } // 过滤高价商品
.map { it.productId } // 提取产品ID
.distinct() // 去重
}
fun calculateTotalRevenue(orders: List<Order>): Double {
return orders
.flatMap { it.items }
.sumOf { it.price * it.quantity }
}
}
示例2:groupBy分组操作
// 基础groupBy
fun testBasicGroupBy() {
val numbers = listOf(1, 2, 3, 4, 5, 6)
// 按奇偶分组
val grouped = numbers.groupBy { it % 2 == 0 }
println(grouped)
// {false=[1, 3, 5], true=[2, 4, 6]}
val evens = grouped[true] // [2, 4, 6]
val odds = grouped[false] // [1, 3, 5]
}
// 按字符串属性分组
fun testGroupByString() {
val words = listOf("apple", "banana", "apricot", "blueberry", "cherry")
// 按首字母分组
val byFirstLetter = words.groupBy { it.first() }
println(byFirstLetter)
// {a=[apple, apricot], b=[banana, blueberry], c=[cherry]}
// 按长度分组
val byLength = words.groupBy { it.length }
println(byLength)
// {5=[apple], 6=[banana, cherry], 7=[apricot], 9=[blueberry]}
}
// Android场景:设备分组
data class Device(
val id: String,
val name: String,
val type: String,
val room: String,
val isOnline: Boolean
)
class DeviceGroupManager {
private val devices = listOf(
Device("D001", "Camera 1", "camera", "Living Room", true),
Device("D002", "Camera 2", "camera", "Bedroom", true),
Device("D003", "Lock", "lock", "Front Door", false),
Device("D004", "Doorbell", "doorbell", "Front Door", true),
Device("D005", "Camera 3", "camera", "Kitchen", false)
)
// 按类型分组
fun groupByType(): Map<String, List<Device>> {
return devices.groupBy { it.type }
// {camera=[D001, D002, D005], lock=[D003], doorbell=[D004]}
}
// 按房间分组
fun groupByRoom(): Map<String, List<Device>> {
return devices.groupBy { it.room }
}
// 按在线状态分组
fun groupByOnlineStatus(): Map<Boolean, List<Device>> {
return devices.groupBy { it.isOnline }
}
// 多级分组:先按类型,再按房间
fun groupByTypeAndRoom(): Map<String, Map<String, List<Device>>> {
return devices
.groupBy { it.type }
.mapValues { (_, deviceList) ->
deviceList.groupBy { it.room }
}
}
// 分组后统计数量
fun countByType(): Map<String, Int> {
return devices
.groupBy { it.type }
.mapValues { (_, deviceList) -> deviceList.size }
// {camera=3, lock=1, doorbell=1}
}
// 分组后转换
fun getDeviceNamesByType(): Map<String, List<String>> {
return devices
.groupBy { it.type }
.mapValues { (_, deviceList) ->
deviceList.map { it.name }
}
}
}
// 复杂分组场景
class AnalyticsManager {
data class Event(
val userId: String,
val eventType: String,
val timestamp: Long,
val value: Double
)
fun analyzeEvents(events: List<Event>) {
// 按用户ID分组
val byUser = events.groupBy { it.userId }
// 按事件类型分组并计算总值
val totalByType = events
.groupBy { it.eventType }
.mapValues { (_, eventList) ->
eventList.sumOf { it.value }
}
// 按时间段分组(每小时)
val byHour = events.groupBy { event ->
val hour = event.timestamp / 3600000 // 转换为小时
hour
}
println("用户事件数: ${byUser.mapValues { it.value.size }}")
println("类型总值: $totalByType")
println("每小时事件数: ${byHour.mapValues { it.value.size }}")
}
}
示例3:partition分区操作
// 基础partition
fun testBasicPartition() {
val numbers = listOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
// 分为两组:满足条件和不满足条件
val (evens, odds) = numbers.partition { it % 2 == 0 }
println("偶数: $evens") // [2, 4, 6, 8, 10]
println("奇数: $odds") // [1, 3, 5, 7, 9]
}
// partition vs filter对比
fun testPartitionVsFilter() {
val numbers = listOf(1, 2, 3, 4, 5, 6)
// ❌ 使用filter需要遍历两次
val evens1 = numbers.filter { it % 2 == 0 }
val odds1 = numbers.filter { it % 2 != 0 }
// ✅ 使用partition只遍历一次
val (evens2, odds2) = numbers.partition { it % 2 == 0 }
}
// Android场景:设备分区
class DevicePartitionManager {
private val devices = listOf(
Device("D001", "Camera 1", "camera", "Living Room", true),
Device("D002", "Camera 2", "camera", "Bedroom", false),
Device("D003", "Lock", "lock", "Front Door", true),
Device("D004", "Doorbell", "doorbell", "Front Door", false)
)
// 按在线状态分区
fun partitionByOnlineStatus() {
val (online, offline) = devices.partition { it.isOnline }
println("在线设备: ${online.size}")
println("离线设备: ${offline.size}")
updateOnlineUI(online)
updateOfflineUI(offline)
}
// 分区并进一步处理
fun processDevices() {
val (cameras, others) = devices.partition { it.type == "camera" }
// 分别处理两组设备
cameras.forEach { camera ->
println("配置摄像头: ${camera.name}")
}
others.forEach { device ->
println("配置其他设备: ${device.name}")
}
}
// 多条件分区
fun complexPartition() {
// 第一次分区:在线 vs 离线
val (online, offline) = devices.partition { it.isOnline }
// 第二次分区:在线设备中,摄像头 vs 其他
val (onlineCameras, onlineOthers) = online.partition { it.type == "camera" }
println("在线摄像头: ${onlineCameras.size}")
println("在线其他设备: ${onlineOthers.size}")
println("离线设备: ${offline.size}")
}
private fun updateOnlineUI(devices: List<Device>) {}
private fun updateOfflineUI(devices: List<Device>) {}
}
// 验证场景:分区验证数据
class DataValidator {
data class InputData(val id: String, val value: String)
fun validateAndProcess(data: List<InputData>) {
// 分区:有效 vs 无效
val (valid, invalid) = data.partition { input ->
input.value.isNotEmpty() && input.value.length <= 100
}
// 处理有效数据
valid.forEach { processValid(it) }
// 记录无效数据
if (invalid.isNotEmpty()) {
logErrors(invalid)
}
println("有效: ${valid.size}, 无效: ${invalid.size}")
}
private fun processValid(data: InputData) {
println("处理有效数据: ${data.id}")
}
private fun logErrors(data: List<InputData>) {
data.forEach { println("无效数据: ${it.id}") }
}
}
示例4:综合应用
// 场景:社交应用的消息处理
class MessageProcessor {
data class Message(
val id: String,
val senderId: String,
val content: String,
val timestamp: Long,
val isRead: Boolean,
val hasAttachment: Boolean
)
data class Conversation(
val userId: String,
val messages: List<Message>,
val unreadCount: Int
)
fun processMessages(messages: List<Message>): List<Conversation> {
// 1. 按发送者分组
val byUser = messages.groupBy { it.senderId }
// 2. 转换为会话列表
return byUser.map { (userId, userMessages) ->
// 3. 分区:未读 vs 已读
val (unread, _) = userMessages.partition { !it.isRead }
Conversation(
userId = userId,
messages = userMessages.sortedByDescending { it.timestamp },
unreadCount = unread.size
)
}.sortedByDescending { it.unreadCount } // 按未读数排序
}
// 提取所有附件消息
fun extractAttachments(messages: List<Message>): Map<String, List<Message>> {
return messages
.filter { it.hasAttachment }
.groupBy { it.senderId }
}
// 分析消息统计
fun analyzeMessages(messages: List<Message>): Map<String, Any> {
val (withAttachment, withoutAttachment) = messages.partition { it.hasAttachment }
val (read, unread) = messages.partition { it.isRead }
val byUser = messages.groupBy { it.senderId }
return mapOf(
"total" to messages.size,
"withAttachment" to withAttachment.size,
"withoutAttachment" to withoutAttachment.size,
"read" to read.size,
"unread" to unread.size,
"uniqueUsers" to byUser.size
)
}
}
// Android场景:完整的设备管理
class DeviceManagementViewModel : ViewModel() {
data class DeviceDetail(
val device: Device,
val events: List<DeviceEvent>
)
data class DeviceEvent(
val type: String,
val timestamp: Long,
val message: String
)
fun analyzeDeviceData(deviceDetails: List<DeviceDetail>) {
// 1. flatMap: 展开所有事件
val allEvents = deviceDetails.flatMap { it.events }
// 2. groupBy: 按事件类型分组
val eventsByType = allEvents.groupBy { it.type }
// 3. partition: 分离在线和离线设备
val (online, offline) = deviceDetails.partition { it.device.isOnline }
// 4. 组合操作:统计每个房间的设备状态
val roomStats = deviceDetails
.groupBy { it.device.room }
.mapValues { (_, details) ->
val (onlineInRoom, offlineInRoom) = details.partition { it.device.isOnline }
mapOf(
"online" to onlineInRoom.size,
"offline" to offlineInRoom.size,
"total" to details.size
)
}
// 5. 复杂分析:每个设备类型的事件统计
val eventStatsByType = deviceDetails
.groupBy { it.device.type }
.mapValues { (_, details) ->
val totalEvents = details.flatMap { it.events }.size
val eventTypes = details
.flatMap { it.events }
.groupBy { it.type }
.mapValues { (_, events) -> events.size }
mapOf(
"totalEvents" to totalEvents,
"eventBreakdown" to eventTypes
)
}
logAnalysis(roomStats, eventStatsByType)
}
private fun logAnalysis(
roomStats: Map<String, Map<String, Int>>,
eventStats: Map<String, Map<String, Any>>
) {
println("房间统计: $roomStats")
println("事件统计: $eventStats")
}
}
示例5:性能优化
// Sequence优化组合操作
class OptimizedProcessor {
fun processLargeDataset(devices: List<Device>) {
// ❌ 普通集合:多次遍历,创建多个中间集合
val result1 = devices
.flatMap { listOf(it, it) } // 创建中间List
.groupBy { it.type } // 遍历并创建Map
.mapValues { (_, list) ->
list.partition { it.isOnline }
}
// ✅ 使用Sequence:惰性求值
val result2 = devices.asSequence()
.flatMap { sequenceOf(it, it) }
.groupBy { it.type } // 这里会执行
.mapValues { (_, list) ->
list.partition { it.isOnline }
}
}
// 早期终止优化
fun findFirstOnlineCamera(users: List<User>): Device? {
// ✅ 使用Sequence + firstOrNull:找到后立即停止
return users.asSequence()
.flatMap { it.devices.asSequence() }
.filter { it.type == "camera" }
.firstOrNull { it.isOnline }
}
// 对比:不使用Sequence
fun findFirstOnlineCameraNoSeq(users: List<User>): Device? {
// ❌ 会处理所有用户的所有设备
return users
.flatMap { it.devices }
.filter { it.type == "camera" }
.firstOrNull { it.isOnline }
}
}
// 性能测试
fun performanceTest() {
val users = List(1000) { userId ->
User(
name = "User$userId",
devices = List(100) { deviceId ->
Device("D$deviceId", "Device$deviceId", "camera", "Room", deviceId % 10 == 0)
}
)
}
// 测试flatMap + filter(找到第一个即可)
val time1 = measureTimeMillis {
users.flatMap { it.devices }.filter { it.isOnline }.firstOrNull()
}
val time2 = measureTimeMillis {
users.asSequence().flatMap { it.devices }.filter { it.isOnline }.firstOrNull()
}
println("List: ${time1}ms, Sequence: ${time2}ms")
// 典型结果: List: 100ms, Sequence: 1ms
}
fun measureTimeMillis(block: () -> Unit): Long {
val start = System.currentTimeMillis()
block()
return System.currentTimeMillis() - start
}
⚡ 关键要点
1. 何时使用flatMap/groupBy/partition
// flatMap: 嵌套集合需要扁平化
val allDevices = users.flatMap { it.devices }
// groupBy: 需要按某个属性分类
val byType = devices.groupBy { it.type }
// partition: 需要分为两组(满足 vs 不满足)
val (valid, invalid) = data.partition { it.isValid }
// 如果需要分为多组,用groupBy而不是多次partition
// ✅ 推荐
val byStatus = devices.groupBy { it.status }
// ❌ 不推荐
val (online, temp) = devices.partition { it.status == "online" }
val (offline, unknown) = temp.partition { it.status == "offline" }
2. groupBy vs partition对比
// partition: 只能分为两组
val (group1, group2) = list.partition { condition }
// groupBy: 可以分为多组
val groups = list.groupBy { key }
// 等价转换
val (evens, odds) = numbers.partition { it % 2 == 0 }
// 等价于
val grouped = numbers.groupBy { it % 2 == 0 }
val evens = grouped[true] ?: emptyList()
val odds = grouped[false] ?: emptyList()
3. 性能最佳实践
// 小数据集:直接使用集合操作
val result = smallList.flatMap { }.groupBy { }
// 大数据集:使用Sequence
val result = largeList.asSequence()
.flatMap { }
.groupBy { }
// 只需部分结果:Sequence + take
val first10 = hugeList.asSequence()
.flatMap { it.items }
.take(10)
.toList()
// 避免不必要的操作
// ❌ 不推荐:先groupBy再flatMap
val result1 = items.groupBy { it.type }.values.flatten()
// ✅ 推荐:直接使用原集合
val result2 = items
4. 常见错误
// 错误1:groupBy后忘记处理空组
val byType = devices.groupBy { it.type }
// val cameras = byType["camera"] // ❌ 可能返回null
val cameras = byType["camera"] ?: emptyList() // ✅
// 错误2:flatMap中返回单个元素
// ❌ 错误:flatMap需要返回集合
// val result = list.flatMap { it.value }
val result = list.flatMap { listOf(it.value) } // ✅
// 或者直接用map
val result2 = list.map { it.value } // ✅ 更简单
// 错误3:partition后不使用解构
val partitioned = list.partition { it.isValid }
val valid = partitioned.first // ❌ 不清晰
val invalid = partitioned.second
// ✅ 使用解构
val (valid2, invalid2) = list.partition { it.isValid }
🔗 相关知识点
- #129 集合操作:map、filter、reduce - 基础集合操作
- #131 序列(Sequence)vs 集合 - 性能优化
- #121 高阶函数 - 函数式编程基础
总结
flatMap、groupBy、partition是处理复杂集合转换的利器。flatMap用于扁平化嵌套结构,groupBy用于分组聚合,partition用于二分数据。这三个函数可以组合使用,处理各种复杂的数据转换场景。在Android开发中,它们广泛应用于消息处理、设备管理、数据分析等场景。对于大数据集,结合Sequence可以显著提升性能。掌握这些高级集合操作,能写出更简洁、更高效的数据处理代码。
更多推荐


所有评论(0)