上海市救护车出租网

SQL语句查询连续N天登录用户(解决方案)

2026-03-27 15:28:02 浏览次数:0
详细信息

1. 自连接方案(适用于小数据量)

-- 查询连续3天登录的用户(方案1)
SELECT DISTINCT a.user_id
FROM login_log a
JOIN login_log b ON a.user_id = b.user_id
    AND b.login_date = DATE_ADD(a.login_date, INTERVAL 1 DAY)
JOIN login_log c ON a.user_id = c.user_id
    AND c.login_date = DATE_ADD(a.login_date, INTERVAL 2 DAY);

-- 方案2:使用窗口函数(推荐)
SELECT DISTINCT user_id
FROM (
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) as diff
    FROM (
        SELECT DISTINCT user_id, login_date 
        FROM login_log
    ) t
) t2
GROUP BY user_id, diff
HAVING COUNT(*) >= 3;

2. 窗口函数方案(最常用且高效)

-- 方案1:使用ROW_NUMBER计算连续登录
WITH login_days AS (
    SELECT 
        user_id,
        login_date,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) as rn
    FROM (
        SELECT DISTINCT user_id, DATE(login_time) as login_date
        FROM login_log
    ) t
)
SELECT DISTINCT user_id
FROM (
    SELECT 
        user_id,
        DATE_SUB(login_date, INTERVAL rn DAY) as group_date
    FROM login_days
) t
GROUP BY user_id, group_date
HAVING COUNT(*) >= 3;

-- 方案2:使用LAG函数
WITH consecutive_check AS (
    SELECT 
        user_id,
        login_date,
        LAG(login_date, 2) OVER (PARTITION BY user_id ORDER BY login_date) as prev_date2
    FROM (
        SELECT DISTINCT user_id, DATE(login_time) as login_date
        FROM login_log
    ) t
)
SELECT DISTINCT user_id
FROM consecutive_check
WHERE DATEDIFF(login_date, prev_date2) = 2;

3. 变量方案(MySQL特定)

-- 使用用户变量追踪连续天数
SELECT DISTINCT user_id
FROM (
    SELECT 
        user_id,
        login_date,
        @seq := IF(@prev_user = user_id AND DATEDIFF(login_date, @prev_date) = 1, @seq + 1, 1) as seq,
        @max_seq := IF(@seq > @max_seq, @seq, @max_seq) as max_seq,
        @prev_user := user_id,
        @prev_date := login_date
    FROM (
        SELECT DISTINCT user_id, DATE(login_time) as login_date
        FROM login_log
        ORDER BY user_id, login_date
    ) t
    CROSS JOIN (SELECT @prev_user := NULL, @prev_date := NULL, @seq := 0, @max_seq := 0) vars
) t2
WHERE max_seq >= 3;

4. 完整的通用解决方案

-- 查询连续N天登录的用户(通用版本)
WITH daily_login AS (
    -- 去重,确保每天只算一次登录
    SELECT DISTINCT 
        user_id,
        DATE(login_time) as login_date
    FROM login_log
),
login_groups AS (
    -- 计算连续登录分组
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, 
                INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
        ) as group_id
    FROM daily_login
),
consecutive_counts AS (
    -- 统计每组连续登录天数
    SELECT 
        user_id,
        group_id,
        MIN(login_date) as start_date,
        MAX(login_date) as end_date,
        COUNT(*) as consecutive_days,
        GROUP_CONCAT(login_date ORDER BY login_date) as login_dates
    FROM login_groups
    GROUP BY user_id, group_id
)
-- 查询连续N天登录的用户
SELECT DISTINCT user_id
FROM consecutive_counts
WHERE consecutive_days >= 3  -- N = 3
ORDER BY user_id;

-- 如果需要查看更多信息
SELECT *
FROM consecutive_counts
WHERE consecutive_days >= 3
ORDER BY user_id, start_date;

5. 扩展功能:查找最长连续登录天数

-- 查找每个用户的最长连续登录天数
WITH daily_login AS (
    SELECT DISTINCT user_id, DATE(login_time) as login_date
    FROM login_log
),
login_groups AS (
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, 
                INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
        ) as group_id
    FROM daily_login
),
consecutive_counts AS (
    SELECT 
        user_id,
        COUNT(*) as consecutive_days,
        MIN(login_date) as start_date,
        MAX(login_date) as end_date
    FROM login_groups
    GROUP BY user_id, group_id
)
SELECT 
    user_id,
    MAX(consecutive_days) as max_consecutive_days
FROM consecutive_counts
GROUP BY user_id
ORDER BY max_consecutive_days DESC;

6. 考虑日期中断的解决方案

-- 允许中断1天的连续登录(例如:周一到周三,跳过周二也算连续)
WITH daily_login AS (
    SELECT DISTINCT user_id, DATE(login_time) as login_date
    FROM login_log
),
login_sequence AS (
    SELECT 
        user_id,
        login_date,
        LAG(login_date) OVER (PARTITION BY user_id ORDER BY login_date) as prev_date
    FROM daily_login
),
login_gaps AS (
    SELECT 
        user_id,
        login_date,
        CASE 
            WHEN prev_date IS NULL THEN 1
            WHEN DATEDIFF(login_date, prev_date) <= 2 THEN 0  -- 允许中断1天
            ELSE 1
        END as new_group
    FROM login_sequence
),
login_groups AS (
    SELECT 
        user_id,
        login_date,
        SUM(new_group) OVER (PARTITION BY user_id ORDER BY login_date) as group_id
    FROM login_gaps
)
SELECT DISTINCT user_id
FROM (
    SELECT 
        user_id,
        group_id,
        COUNT(*) as consecutive_days
    FROM login_groups
    GROUP BY user_id, group_id
    HAVING COUNT(*) >= 3
) t;

性能优化建议

建立索引

CREATE INDEX idx_user_login ON login_log(user_id, login_time);
CREATE INDEX idx_date ON login_log(login_time);

预先去重:在子查询中使用DISTINCT减少处理数据量

分区表:如果数据量大,考虑按日期分区

物化视图:对于频繁查询,可以创建物化视图存储每日活跃用户

注意事项

确保登录时间字段为日期时间类型 考虑时区问题 处理NULL值和重复数据 根据实际业务需求调整"连续"的定义(是否允许跨天等)

推荐使用窗口函数方案,它在现代数据库系统中性能最好,代码也最简洁。

相关推荐