mirror of
https://github.com/zulip/zulip.git
synced 2026-06-18 21:01:52 +08:00
analytics: Add class DependentCountStat and stat realm_active_humans::day.
This commit is contained in:
parent
62de1cf898
commit
49bd330304
@ -4,15 +4,16 @@ from django.db.models import F
|
||||
from django.utils import timezone
|
||||
|
||||
from analytics.models import InstallationCount, RealmCount, \
|
||||
UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch
|
||||
UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch, \
|
||||
last_successful_fill
|
||||
from zerver.models import Realm, UserProfile, Message, Stream, \
|
||||
UserActivityInterval, RealmAuditLog, models
|
||||
from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \
|
||||
ceiling_to_hour
|
||||
|
||||
from typing import Any, Callable, Dict, Optional, Text, Tuple, Type, Union
|
||||
from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Type, Union
|
||||
|
||||
from collections import defaultdict
|
||||
from collections import defaultdict, OrderedDict
|
||||
from datetime import timedelta, datetime
|
||||
import logging
|
||||
import time
|
||||
@ -64,6 +65,12 @@ class LoggingCountStat(CountStat):
|
||||
# type: (str, Type[BaseCount], str) -> None
|
||||
CountStat.__init__(self, property, DataCollector(output_table, None), frequency)
|
||||
|
||||
class DependentCountStat(CountStat):
|
||||
def __init__(self, property, data_collector, frequency, interval=None, dependencies=[]):
|
||||
# type: (str, DataCollector, str, Optional[timedelta], List[str]) -> None
|
||||
CountStat.__init__(self, property, data_collector, frequency, interval=interval)
|
||||
self.dependencies = dependencies
|
||||
|
||||
class DataCollector(object):
|
||||
def __init__(self, output_table, pull_function):
|
||||
# type: (Type[BaseCount], Optional[Callable[[str, datetime, datetime], int]]) -> None
|
||||
@ -92,6 +99,15 @@ def process_count_stat(stat, fill_to_time):
|
||||
else:
|
||||
raise AssertionError("Unknown value for FillState.state: %s." % (fill_state.state,))
|
||||
|
||||
if isinstance(stat, DependentCountStat):
|
||||
for dependency in stat.dependencies:
|
||||
dependency_fill_time = last_successful_fill(dependency)
|
||||
if dependency_fill_time is None:
|
||||
logger.warning("DependentCountStat %s run before dependency %s." %
|
||||
(stat.property, dependency))
|
||||
return
|
||||
fill_to_time = min(fill_to_time, dependency_fill_time)
|
||||
|
||||
currently_filled = currently_filled + timedelta(hours = 1)
|
||||
while currently_filled <= fill_to_time:
|
||||
logger.info("START %s %s" % (stat.property, currently_filled))
|
||||
@ -229,8 +245,8 @@ def do_pull_by_sql_query(property, start_time, end_time, query, group_by):
|
||||
|
||||
# We do string replacement here because cursor.execute will reject a
|
||||
# group_by_clause given as a param.
|
||||
# We pass in the datetimes as params so that we don't have to think about
|
||||
# how to convert python datetimes to SQL datetimes.
|
||||
# We pass in the datetimes as params to cursor.execute so that we don't have to
|
||||
# think about how to convert python datetimes to SQL datetimes.
|
||||
query_ = query % {'property': property, 'subgroup': subgroup,
|
||||
'group_by_clause': group_by_clause}
|
||||
cursor = connection.cursor()
|
||||
@ -407,6 +423,31 @@ check_useractivityinterval_by_user_query = """
|
||||
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
||||
"""
|
||||
|
||||
count_realm_active_humans_query = """
|
||||
INSERT INTO analytics_realmcount
|
||||
(realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
usercount1.realm_id, count(*), '%(property)s', NULL, %%(time_end)s
|
||||
FROM (
|
||||
SELECT realm_id, user_id
|
||||
FROM analytics_usercount
|
||||
WHERE
|
||||
property = 'active_users_audit:is_bot:day' AND
|
||||
subgroup = 'false' AND
|
||||
end_time = %%(time_end)s
|
||||
) usercount1
|
||||
JOIN (
|
||||
SELECT realm_id, user_id
|
||||
FROM analytics_usercount
|
||||
WHERE
|
||||
property = '15day_actives::day' AND
|
||||
end_time = %%(time_end)s
|
||||
) usercount2
|
||||
ON
|
||||
usercount1.user_id = usercount2.user_id
|
||||
GROUP BY usercount1.realm_id
|
||||
"""
|
||||
|
||||
# Currently unused and untested
|
||||
count_stream_by_realm_query = """
|
||||
INSERT INTO analytics_realmcount
|
||||
@ -450,6 +491,7 @@ count_stats_ = [
|
||||
# latter stat was introduced.
|
||||
# 'active_users_audit:is_bot:day' is the canonical record of which users were
|
||||
# active on which days (in the UserProfile.is_active sense).
|
||||
# Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected.
|
||||
CountStat('active_users_audit:is_bot:day',
|
||||
sql_data_collector(UserCount, check_realmauditlog_by_user_query, (UserProfile, 'is_bot')),
|
||||
CountStat.DAY),
|
||||
@ -460,7 +502,13 @@ count_stats_ = [
|
||||
CountStat('15day_actives::day',
|
||||
sql_data_collector(UserCount, check_useractivityinterval_by_user_query, None),
|
||||
CountStat.DAY, interval=timedelta(days=15)-timedelta(minutes=15)),
|
||||
CountStat('minutes_active::day', DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY)
|
||||
CountStat('minutes_active::day', DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY),
|
||||
|
||||
# Canonical account of the number of active humans in a realm on each day.
|
||||
DependentCountStat('realm_active_humans::day',
|
||||
sql_data_collector(RealmCount, count_realm_active_humans_query, None),
|
||||
CountStat.DAY,
|
||||
dependencies=['active_users_audit:is_bot:day', '15day_actives::day'])
|
||||
]
|
||||
|
||||
COUNT_STATS = {stat.property: stat for stat in count_stats_}
|
||||
COUNT_STATS = OrderedDict([(stat.property, stat) for stat in count_stats_])
|
||||
|
||||
@ -9,17 +9,19 @@ from django.utils import timezone
|
||||
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
|
||||
do_fill_count_stat_at_hour, do_increment_logging_stat, DataCollector, \
|
||||
sql_data_collector, LoggingCountStat, do_aggregate_to_summary_table, \
|
||||
do_drop_all_analytics_tables
|
||||
do_drop_all_analytics_tables, DependentCountStat
|
||||
from analytics.models import BaseCount, InstallationCount, RealmCount, \
|
||||
UserCount, StreamCount, FillState, Anomaly, installation_epoch
|
||||
UserCount, StreamCount, FillState, Anomaly, installation_epoch, \
|
||||
last_successful_fill
|
||||
from zerver.lib.actions import do_create_user, do_deactivate_user, \
|
||||
do_activate_user, do_reactivate_user
|
||||
do_activate_user, do_reactivate_user, update_user_activity_interval
|
||||
from zerver.lib.timestamp import floor_to_day
|
||||
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
|
||||
Huddle, Client, UserActivityInterval, RealmAuditLog, \
|
||||
get_user_profile_by_email, get_client
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import ujson
|
||||
|
||||
from six.moves import range
|
||||
from typing import Any, Dict, List, Optional, Text, Tuple, Type, Union
|
||||
@ -238,6 +240,42 @@ class TestProcessCountStat(AnalyticsTestCase):
|
||||
self.assertTableState(InstallationCount, ['property', 'value'],
|
||||
[[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]])
|
||||
|
||||
def test_process_dependent_stat(self):
|
||||
# type: () -> None
|
||||
stat1 = self.make_dummy_count_stat('stat1')
|
||||
stat2 = self.make_dummy_count_stat('stat2')
|
||||
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
||||
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat3')
|
||||
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None), CountStat.HOUR,
|
||||
dependencies=['stat1', 'stat2'])
|
||||
hour = [installation_epoch() + i*self.HOUR for i in range(5)]
|
||||
|
||||
# test when one dependency has been run, and the other hasn't
|
||||
process_count_stat(stat1, hour[2])
|
||||
process_count_stat(stat3, hour[1])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]]])
|
||||
self.assertFillStateEquals(stat3, hour[0])
|
||||
|
||||
# test that we don't fill past the fill_to_time argument, even if
|
||||
# dependencies have later last_successful_fill
|
||||
process_count_stat(stat2, hour[3])
|
||||
process_count_stat(stat3, hour[1])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]],
|
||||
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
|
||||
['stat3', hour[1]]])
|
||||
self.assertFillStateEquals(stat3, hour[1])
|
||||
|
||||
# test that we don't fill past the dependency last_successful_fill times,
|
||||
# even if fill_to_time is later
|
||||
process_count_stat(stat3, hour[4])
|
||||
self.assertTableState(InstallationCount, ['property', 'end_time'],
|
||||
[['stat1', hour[1]], ['stat1', hour[2]],
|
||||
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
|
||||
['stat3', hour[1]], ['stat3', hour[2]]])
|
||||
self.assertFillStateEquals(stat3, hour[2])
|
||||
|
||||
class TestCountStats(AnalyticsTestCase):
|
||||
def setUp(self):
|
||||
# type: () -> None
|
||||
@ -886,3 +924,104 @@ class TestActiveUsersAudit(AnalyticsTestCase):
|
||||
user=user, property=self.current_property, subgroup='false',
|
||||
end_time=end_time, value=1).exists())
|
||||
self.assertFalse(UserCount.objects.filter(user=user2).exists())
|
||||
|
||||
class TestRealmActiveHumans(AnalyticsTestCase):
|
||||
def setUp(self):
|
||||
# type: () -> None
|
||||
super(TestRealmActiveHumans, self).setUp()
|
||||
self.stat = COUNT_STATS['realm_active_humans::day']
|
||||
self.current_property = self.stat.property
|
||||
|
||||
def mark_audit_active(self, user, end_time=None):
|
||||
# type: (UserProfile, Optional[datetime]) -> None
|
||||
if end_time is None:
|
||||
end_time = self.TIME_ZERO
|
||||
UserCount.objects.create(
|
||||
user=user, realm=user.realm, property='active_users_audit:is_bot:day',
|
||||
subgroup=ujson.dumps(user.is_bot), end_time=end_time, value=1)
|
||||
|
||||
def mark_15day_active(self, user, end_time=None):
|
||||
# type: (UserProfile, Optional[datetime]) -> None
|
||||
if end_time is None:
|
||||
end_time = self.TIME_ZERO
|
||||
UserCount.objects.create(
|
||||
user=user, realm=user.realm, property='15day_actives::day',
|
||||
end_time=end_time, value=1)
|
||||
|
||||
def test_basic_boolean_logic(self):
|
||||
# type: () -> None
|
||||
user = self.create_user()
|
||||
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||
self.mark_15day_active(user, end_time=self.TIME_ZERO)
|
||||
self.mark_audit_active(user, end_time=self.TIME_ZERO + self.DAY)
|
||||
self.mark_15day_active(user, end_time=self.TIME_ZERO + self.DAY)
|
||||
|
||||
for i in [-1, 0, 1]:
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
|
||||
self.assertTableState(RealmCount, ['value', 'end_time'], [[1, self.TIME_ZERO + self.DAY]])
|
||||
|
||||
def test_bots_not_counted(self):
|
||||
# type: () -> None
|
||||
bot = self.create_user(is_bot=True)
|
||||
self.mark_audit_active(bot)
|
||||
self.mark_15day_active(bot)
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||
self.assertTableState(RealmCount, [], [])
|
||||
|
||||
def test_multiple_users_realms_and_times(self):
|
||||
# type: () -> None
|
||||
user1 = self.create_user()
|
||||
user2 = self.create_user()
|
||||
second_realm = Realm.objects.create(string_id='second', name='second')
|
||||
user3 = self.create_user(realm=second_realm)
|
||||
user4 = self.create_user(realm=second_realm)
|
||||
user5 = self.create_user(realm=second_realm)
|
||||
|
||||
for user in [user1, user2, user3, user4, user5]:
|
||||
self.mark_audit_active(user)
|
||||
self.mark_15day_active(user)
|
||||
for user in [user1, user3, user4]:
|
||||
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||
self.mark_15day_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||
|
||||
for i in [-1, 0, 1]:
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
|
||||
self.assertTableState(RealmCount, ['value', 'realm', 'end_time'],
|
||||
[[2, self.default_realm, self.TIME_ZERO],
|
||||
[3, second_realm, self.TIME_ZERO],
|
||||
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
||||
[2, second_realm, self.TIME_ZERO - self.DAY]])
|
||||
|
||||
# Check that adding spurious entries doesn't make a difference
|
||||
self.mark_audit_active(user1, end_time=self.TIME_ZERO + self.DAY)
|
||||
self.mark_15day_active(user2, end_time=self.TIME_ZERO + self.DAY)
|
||||
self.mark_15day_active(user2, end_time=self.TIME_ZERO - self.DAY)
|
||||
self.create_user()
|
||||
third_realm = Realm.objects.create(string_id='third', name='third')
|
||||
self.create_user(realm=third_realm)
|
||||
|
||||
RealmCount.objects.all().delete()
|
||||
for i in [-1, 0, 1]:
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
|
||||
self.assertTableState(RealmCount, ['value', 'realm', 'end_time'],
|
||||
[[2, self.default_realm, self.TIME_ZERO],
|
||||
[3, second_realm, self.TIME_ZERO],
|
||||
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
||||
[2, second_realm, self.TIME_ZERO - self.DAY]])
|
||||
|
||||
def test_end_to_end(self):
|
||||
# type: () -> None
|
||||
user1 = do_create_user('email1', 'password', self.default_realm, 'full_name', 'short_name')
|
||||
user2 = do_create_user('email2', 'password', self.default_realm, 'full_name', 'short_name')
|
||||
do_create_user('email3', 'password', self.default_realm, 'full_name', 'short_name')
|
||||
time_zero = floor_to_day(timezone.now()) + self.DAY
|
||||
update_user_activity_interval(user1, time_zero)
|
||||
update_user_activity_interval(user2, time_zero)
|
||||
do_deactivate_user(user2)
|
||||
for property in ['active_users_audit:is_bot:day', '15day_actives::day',
|
||||
'realm_active_humans::day']:
|
||||
FillState.objects.create(property=property, state=FillState.DONE, end_time=time_zero)
|
||||
process_count_stat(COUNT_STATS[property], time_zero+self.DAY)
|
||||
self.assertEqual(RealmCount.objects.filter(
|
||||
property='realm_active_humans::day', end_time=time_zero+self.DAY, value=1).count(), 1)
|
||||
self.assertEqual(RealmCount.objects.filter(property='realm_active_humans::day').count(), 1)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user