1.推荐系统上线技术方案
出入参设计
public class PreparationResourcesPersonalityRequest{
/**
* 章节目录code
*/
private String chapterCode;
/**
* 课本code
*/
private String testBookCode;
/**
* 学科
*/
@NotNull(message = "学科编码不能为null")
private String subjectCode;
/**
* 学段
*/
@NotNull(message = "学段编码不能为null")
private String phaseCode;
/**
* 年级1
*/
private String grade;
/**
* 资源类型-一级资源目录
*/
private String firstApplicationDirectory;
/**
* 资源类型-技术应用类型
*/
private String technologyApplicationType;
/**
* 用户id
*/
private String userId;
/**
* 用户学校id
*/
private String userSchoolId;
/**
* 用户所在省份
*/
private String userProvinceCode;
/**
* 用户所在市
*/
private String userCityCode;
/**
* 用户所在区
*/
private String userAreaCode;
/**
* 去重位数
*/
private Integer deDuplicationIndex;
/**
* 当前页数
*/
private Integer currentPage = 1;
/**
* 没页的数量
*/
private Integer pageSize = 20;
/**
* 资源过滤 非空:推荐usedRange与其有交集的资源;空:推荐usedRange“01”资源 - 废弃
*/
@Deprecated
private List<String> resKey;
/**
* 资源过滤
*/
private List<PreparationResources> resources;
}
public class ClassResRecParam {
/**
* 溯源ID。不能为空
*/
private String traceId;
/**
* 特征列表。不能为空
*/
private List<UserClassFeature> features;
@Data
public static class UserClassFeature {
/* 统计类特征 */
private float resViewCount;
private float resLast7DViewCount;
private float resLast30DViewCount;
private float resYtdViewCount;
private float resReferenceCount;
private float resLast7DReferenceCount;
private float resLast30DReferenceCount;
private float resYtdReferenceCount;
private float userres7DViewCount;
private float userres7DReferenceCount;
private float userres7DEbookViewCount;
private float userres7DEbookReferenceCount;
private float userres30DViewCount;
private float userres30DReferenceCount;
private float userres30DEbookViewCount;
private float userres30DEbookReferenceCount;
private float userViewUnitCount;
private float userViewResourceCount;
private float userReferenceUnitCount;
private float userReferenceResourceCount;
private float userActiveDayCount;
private float userMondayActiveDayCount;
private float userTuesdayActiveDayCount;
private float userWednesdayActiveDayCount;
private float userThursdayActiveDayCount;
private float userFridayActiveDayCount;
private float userSaturdayActiveDayCount;
private float userSundayActiveDayCount;
private float userLast30DMinSincePriorActiveDayCount;
private float userLast30DMaxSincePriorActiveDayCount;
private float userLast30DAvgSincePriorActiveDayCount;
private float userMinSincePriorActiveDayCount;
private float userMaxSincePriorActiveDayCount;
private float userAvgSincePriorActiveDayCount;
private float userViewCostDayCount;
private float userReferenceCostDayCount;
private float resSize;
private float resDuration;
private float pdateDays;
private float resCreateTimeDays;
private float resUpdateTimeDays;
private float resFirstPdateDays;
private float resLastPdateDays;
/* 名称类 */
private long userId;
private long classId;
private long bookCode;
private long bookUnit;
private long moduleId;
private long provinceName;
private long cityName;
private long districtName;
private long schoolName;
private long campusName;
private long appVersion;
private long resResourceTitle;
private long resPublisherName;
private long resVolumnName;
private long resStageName;
private long resQualityLabel;
private long resProjectName;
private long resSourceName;
private long resPhaseName;
private long resGradeName;
private long resSubjectName;
private long resEditionName;
private long resStatus;
private long resExtension;
private long resAuthorName;
private long resUploaderName;
private long resTechTypeName;
private long resUseTypeName;
private long resSubUseTypeName;
private long userViewTop3Extension;
private long userReferenceTop3Extension;
private long userViewTop1Extension;
private long userViewTop2Extension;
private long userReferenceTop1Extension;
private long userReferenceTop2Extension;
private long pdateYear;
private long pdateMonth;
private long pdateWeek;
private long pdateMday;
private long resCreateTimeYear;
private long resCreateTimeMonth;
private long resCreateTimeWeek;
private long resCreateTimeMday;
private long resUpdateTimeYear;
private long resUpdateTimeMonth;
private long resUpdateTimeWeek;
private long resUpdateTimeMday;
private long resFirstPdateYear;
private long resFirstPdateMonth;
private long resFirstPdateWeek;
private long resFirstPdateMday;
private long resLastPdateYear;
private long resLastPdateMonth;
private long resLastPdateWeek;
private long resLastPdateMday;
// 资源id
private long resourceId;
private long lag1ResourceId;
private long lag2ResourceId;
private long lag3ResourceId;
}
}
流程:
注意:1.出入参一致性同步
2. Python脚本转成Java (预计2天)
- 提前预处理
- 使用scala
3. 脚本耗时
流程注意事项:
1.用户冷启动问题
2.线上数据查询方案验证
业务性能指标: 1.5s 以内
查出40个id :1s内
查询耗时:1s内 当前耗时:20s
策略:
- 剥离交互信息
- 剥离left join 变成点查
- 非关系型数据库
- 缓存策略:用户请求
时间点:今天
优化方案:
- 先过滤数据 不要有join group sum
结果:
t2表查询耗时:平均0.9s 数据量:79W
t3表查询耗时:平均0.23s 数据量:36W
t4表查询耗时:平均1.5s 数据量:190W
3.缓存方案及验证
PUT test_cache
{
"mappings": {
"properties": {
"user_id": {
"type": "keyword"
},
"province_id": {
"type": "keyword"
},
"province_name": {
"type": "keyword"
},
"city_id": {
"type": "keyword"
},
"city_name": {
"type": "keyword"
},
"district_id": {
"type": "keyword"
},
"district_name": {
"type": "keyword"
},
"school_id": {
"type": "keyword"
},
"school_name": {
"type": "keyword"
},
"campus_id": {
"type": "keyword"
},
"campus_name": {
"type": "keyword"
},
"book_code": {
"type": "keyword"
},
"unit1": {
"type": "keyword"
},
"unit2": {
"type": "keyword"
},
"unit3": {
"type": "keyword"
},
"unit4": {
"type": "keyword"
},
"book_name": {
"type": "keyword"
},
"resource_id": {
"type": "keyword"
},
"module_id": {
"type": "keyword"
},
"app_version": {
"type": "keyword"
},
"lag1_resource_id": {
"type": "keyword"
},
"lag2_resource_id": {
"type": "keyword"
},
"lag3_resource_id": {
"type": "keyword"
},
"book_unit": {
"type": "keyword"
},
"res_resource_title": {
"type": "keyword"
},
"res_publisher_code": {
"type": "keyword"
},
"res_publisher_name": {
"type": "keyword"
},
"res_volumn_code": {
"type": "keyword"
},
"res_volumn_name": {
"type": "keyword"
},
"res_stage_code": {
"type": "keyword"
},
"res_stage_name": {
"type": "keyword"
},
"res_create_time": {
"type": "keyword"
},
"res_update_time": {
"type": "keyword"
},
"res_type_code": {
"type": "keyword"
},
"res_quality_label": {
"type": "keyword"
},
"res_project_code": {
"type": "keyword"
},
"res_project_name": {
"type": "keyword"
},
"res_source_code": {
"type": "keyword"
},
"res_source_name": {
"type": "keyword"
},
"res_phase_code": {
"type": "keyword"
},
"res_phase_name": {
"type": "keyword"
},
"res_grade_code": {
"type": "keyword"
},
"res_grade_name": {
"type": "keyword"
},
"res_subject_code": {
"type": "keyword"
},
"res_subject_name": {
"type": "keyword"
},
"res_edition_code": {
"type": "keyword"
},
"res_edition_name": {
"type": "keyword"
},
"res_status": {
"type": "keyword"
},
"res_file_name": {
"type": "keyword"
},
"res_extension": {
"type": "keyword"
},
"res_author_id": {
"type": "keyword"
},
"res_author_name": {
"type": "keyword"
},
"res_uploader_id": {
"type": "keyword"
},
"res_uploader_name": {
"type": "keyword"
},
"res_tech_type_code": {
"type": "keyword"
},
"res_tech_type_name": {
"type": "keyword"
},
"res_use_type_code": {
"type": "keyword"
},
"res_use_type_name": {
"type": "keyword"
},
"res_sub_use_type_code": {
"type": "keyword"
},
"res_sub_use_type_name": {
"type": "keyword"
},
"res_size": {
"type": "keyword"
},
"res_duration": {
"type": "keyword"
},
"res_first_pdate": {
"type": "keyword"
},
"res_last_pdate": {
"type": "keyword"
},
"res_view_count": {
"type": "keyword"
},
"res_last7d_view_count": {
"type": "keyword"
},
"res_last30d_view_count": {
"type": "keyword"
},
"res_ytd_view_count": {
"type": "keyword"
},
"res_reference_count": {
"type": "keyword"
},
"res_last7d_reference_count": {
"type": "keyword"
},
"res_last30d_reference_count": {
"type": "keyword"
},
"res_ytd_reference_count": {
"type": "keyword"
},
"userres7d_view_count": {
"type": "keyword"
},
"userres7d_reference_count": {
"type": "keyword"
},
"userres7d_ebook_view_count": {
"type": "keyword"
},
"userres7d_ebook_reference_count": {
"type": "keyword"
},
"userres30d_view_count": {
"type": "keyword"
},
"userres30d_reference_count": {
"type": "keyword"
},
"userres30d_ebook_view_count": {
"type": "keyword"
},
"userres30d_ebook_reference_count": {
"type": "keyword"
},
"user_view_unit_count": {
"type": "keyword"
},
"user_view_cost_day_count": {
"type": "keyword"
},
"user_view_resource_count": {
"type": "keyword"
},
"user_view_top1_extension": {
"type": "keyword"
},
"user_view_top2_extension": {
"type": "keyword"
},
"user_view_top3_extension": {
"type": "keyword"
},
"user_reference_unit_count": {
"type": "keyword"
},
"user_reference_cost_day_count": {
"type": "keyword"
},
"user_reference_resource_count": {
"type": "keyword"
},
"user_reference_top1_extension": {
"type": "keyword"
},
"user_reference_top2_extension": {
"type": "keyword"
},
"user_reference_top3_extension": {
"type": "keyword"
},
"user_active_day_count": {
"type": "keyword"
},
"user_monday_active_day_count": {
"type": "keyword"
},
"user_tuesday_active_day_count": {
"type": "keyword"
},
"user_wednesday_active_day_count": {
"type": "keyword"
},
"user_thursday_active_day_count": {
"type": "keyword"
},
"user_friday_active_day_count": {
"type": "keyword"
},
"user_saturday_active_day_count": {
"type": "keyword"
},
"user_sunday_active_day_count": {
"type": "keyword"
},
"user_last30d_min_since_prior_active_day_count": {
"type": "keyword"
},
"user_last30d_max_since_prior_active_day_count": {
"type": "keyword"
},
"user_last30d_avg_since_prior_active_day_count": {
"type": "keyword"
},
"user_min_since_prior_active_day_count": {
"type": "keyword"
},
"user_max_since_prior_active_day_count": {
"type": "keyword"
},
"user_avg_since_prior_active_day_count": {
"type": "keyword"
}
}
}
}
测试:使用es 80w数据 40个资源 es terms耗时6ms
GET act_di3/_search
{
"query": {
"terms": {
"resource_id": [
"000000a4207c4662808f602070c95d07",
"00000378a3f94586ac8a45884108373f",
"0000045f2a034744a0e58be41f03415f",
"000004f6fda14f5190a5c54a132aa22e",
"0000050dbd81473187a73c3b41caa415",
"000005ecfea74f10b8d98eff31bd2905",
"00000727387b49acb3b09253cf45d96c",
"0000089f30bb491baf2a38f577dab87a",
"000009a387014ab38f872fa36bacd79a",
"00000c459dc94476a0220ddd13d35158",
"00000cfe9e4d4ad6863aeaa5c6808372",
"00000f162f4644f3be0e11d1145b612d",
"00000ff923624941a03e25d76c33202b",
"0000117eb77a4311a69684d181ed2fc3",
"00001463a0bb41d990927140cc1d506e",
"000014bf0eb34db9948ef6a45d80b29e",
"0000194250844192a92af9bad59bd561",
"00001954282a43f3bea28cde34e48b2a",
"00001c34d05d4fc8b662bc50085650df",
"00001f77c1ed4432a04c8eb11fcfaaa6",
"00001fd977af4c8e915f62baaf468c92",
"000020e4c84d4687aef906aff4bbf9cc",
"000021b025bf11e899f0a0d37a6353f2",
"0000235ab8294b7c9ce90566ccd41dc5",
"000026344a584a45845efea5e39085e7",
"0000273758bc46b293b6c12e9bd482e0",
"00002768fe0442e784ffb479f94265c2",
"000027bf6f4b46b986026c9b7d7daecf",
"0000284a01a04c129c6dcec0a7443f9e",
"00002a281d9444e6a9f0b8e8170b6975",
"00002ad7cb4c4b299971cc0b393579c5",
"00002b875bb6425184ef9a012ff69947",
"00002bec27d048aa9df9e4c42c8b0fec",
"00002caf8e7e4871b121e887ef1df792",
"00002e4be7744ce6a61fdf344f82bd08",
"00002e8d85f24f78b1136c0e744e4ed8",
"0000314fdc7e494291847244ce12fefe",
"000031ae86734541a842beb6b87066db",
"0000332ec0124ef9adfb8c008543838e",
"00003444d60b4ae8815fa48dc1adc6af"
]
}
}
}
缓存表验证:
目前数据只插入30W,查询40个资源id,耗时7ms
需求:需要将数据(800W)插入ES
时间点:今天之前
缓存表覆盖度约56%情况下:
数据量:8284000
效果 :1074ms
缓存表覆盖度80+% 数据量 效果
ES极限
ES分开点查: 多个索引- MySql数据插入到Es:
// Stopwatch sw = Stopwatch.createStarted();
// IClassResRec tmp = ClassResRecRawImpl.open();
// sw.stop();
// System.out.println(("初始化耗时:{}ms" + sw.elapsed(TimeUnit.MILLISECONDS)));
// //将在线计算结果转成模型入参
// ClassResRecParam param = new ClassResRecParam();
// ArrayList<ClassResRecParam.UserClassFeature> paramList = new ArrayList<>();
// ClassResRecParam.UserClassFeature userClassFeature = new ClassResRecParam.UserClassFeature();
// for (PersonalizeResourcesResponse personalizeResourcesResponse : responseList) {
// BeanUtils.copyProperties(personalizeResourcesResponse,userClassFeature);
// paramList.add(userClassFeature);
// }
// param.setTraceId(UUID.randomUUID().toString());
// param.setFeatures(paramList);
// ClassResRecResult predict = null;
// try {
// for (int i = 0; i < 10; i++) {
// sw.reset().start();
// predict = tmp.predict(param);
// System.out.println(("模型输出:{} 耗时:{}ms" + predict + sw.stop().elapsed(TimeUnit.MILLISECONDS)));
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// System.out.println(predict);
// tmp.close();
4.上线流程和范围
上线目标:
1、共性推荐:针对开放版本课堂—开放素材推荐及课件推荐
2、个性推荐:优化课堂个性化备课资源推荐,实现精准推荐
上线范围:
1、共性推荐:针对全学科学段
2、个性推荐:针对初中数学,后续根据业务放开其他学科
5.数据链路
Mong:172.31.184.197:27017 paisou
dws_zhkt_resource_profile_di dws_zhkt_user_profile_di
ES索引结构评审
{
"resource_data" : {
"aliases" : { },
"mappings" : {
"properties" : {
"book_code" : {
"type" : "keyword"
},
"book_code_orig" : {
"type" : "keyword"
},
"lag1_resource_id" : {
"type" : "keyword"
},
"lag2_resource_id" : {
"type" : "keyword"
},
"lag3_resource_id" : {
"type" : "keyword"
},
"pdate" : {
"type" : "keyword"
},
"pdate_days" : {
"type" : "keyword"
},
"pdate_mday" : {
"type" : "keyword"
},
"pdate_month" : {
"type" : "keyword"
},
"pdate_week" : {
"type" : "keyword"
},
"pdate_year" : {
"type" : "keyword"
},
"res_author_name" : {
"type" : "keyword"
},
"res_create_time" : {
"type" : "keyword"
},
"res_create_time_days" : {
"type" : "keyword"
},
"res_create_time_mday" : {
"type" : "keyword"
},
"res_create_time_month" : {
"type" : "keyword"
},
"res_create_time_week" : {
"type" : "keyword"
},
"res_create_time_year" : {
"type" : "keyword"
},
"res_duration" : {
"type" : "keyword"
},
"res_edition_name" : {
"type" : "keyword"
},
"res_extension" : {
"type" : "keyword"
},
"res_first_pdate" : {
"type" : "keyword"
},
"res_first_pdate_days" : {
"type" : "keyword"
},
"res_first_pdate_mday" : {
"type" : "keyword"
},
"res_first_pdate_month" : {
"type" : "keyword"
},
"res_first_pdate_week" : {
"type" : "keyword"
},
"res_first_pdate_year" : {
"type" : "keyword"
},
"res_grade_name" : {
"type" : "keyword"
},
"res_last30d_reference_count" : {
"type" : "keyword"
},
"res_last30d_view_count" : {
"type" : "keyword"
},
"res_last7d_reference_count" : {
"type" : "keyword"
},
"res_last7d_view_count" : {
"type" : "keyword"
},
"res_last_pdate" : {
"type" : "keyword"
},
"res_last_pdate_days" : {
"type" : "keyword"
},
"res_last_pdate_mday" : {
"type" : "keyword"
},
"res_last_pdate_month" : {
"type" : "keyword"
},
"res_last_pdate_week" : {
"type" : "keyword"
},
"res_last_pdate_year" : {
"type" : "keyword"
},
"res_phase_name" : {
"type" : "keyword"
},
"res_project_name" : {
"type" : "keyword"
},
"res_publisher_name" : {
"type" : "keyword"
},
"res_quality_label" : {
"type" : "keyword"
},
"res_reference_count" : {
"type" : "keyword"
},
"res_resource_title" : {
"type" : "keyword"
},
"res_size" : {
"type" : "keyword"
},
"res_source_name" : {
"type" : "keyword"
},
"res_stage_name" : {
"type" : "keyword"
},
"res_status" : {
"type" : "keyword"
},
"res_sub_use_type_name" : {
"type" : "keyword"
},
"res_subject_name" : {
"type" : "keyword"
},
"res_tech_type_name" : {
"type" : "keyword"
},
"res_update_time" : {
"type" : "keyword"
},
"res_update_time_days" : {
"type" : "keyword"
},
"res_update_time_mday" : {
"type" : "keyword"
},
"res_update_time_month" : {
"type" : "keyword"
},
"res_update_time_week" : {
"type" : "keyword"
},
"res_update_time_year" : {
"type" : "keyword"
},
"res_uploader_name" : {
"type" : "keyword"
},
"res_use_type_name" : {
"type" : "keyword"
},
"res_view_count" : {
"type" : "keyword"
},
"res_volumn_name" : {
"type" : "keyword"
},
"res_ytd_reference_count" : {
"type" : "keyword"
},
"res_ytd_view_count" : {
"type" : "keyword"
},
"resource_id" : {
"type" : "keyword"
},
"resource_id_orig" : {
"type" : "keyword"
}
}
},
"settings" : {
"index" : {
"creation_date" : "1668155178334",
"number_of_shards" : "3",
"number_of_replicas" : "3",
"uuid" : "N6-cud1JRAq2x2NmbUsEmg",
"version" : {
"created" : "7060299"
},
"provided_name" : "resource_data"
}
}
}
数据量:107W
{
"user_data" : {
"aliases" : { },
"mappings" : {
"properties" : {
"user_active_day_count" : {
"type" : "keyword"
},
"user_avg_since_prior_active_day_count" : {
"type" : "keyword"
},
"user_friday_active_day_count" : {
"type" : "keyword"
},
"user_id" : {
"type" : "keyword"
},
"user_id_orig" : {
"type" : "keyword"
},
"user_last30d_avg_since_prior_active_day_count" : {
"type" : "keyword"
},
"user_last30d_max_since_prior_active_day_count" : {
"type" : "keyword"
},
"user_last30d_min_since_prior_active_day_count" : {
"type" : "keyword"
},
"user_max_since_prior_active_day_count" : {
"type" : "keyword"
},
"user_min_since_prior_active_day_count" : {
"type" : "keyword"
},
"user_monday_active_day_count" : {
"type" : "keyword"
},
"user_reference_cost_day_count" : {
"type" : "keyword"
},
"user_reference_resource_count" : {
"type" : "keyword"
},
"user_reference_top1_extension" : {
"type" : "keyword"
},
"user_reference_top2_extension" : {
"type" : "keyword"
},
"user_reference_top3_extension" : {
"type" : "keyword"
},
"user_reference_unit_count" : {
"type" : "keyword"
},
"user_saturday_active_day_count" : {
"type" : "keyword"
},
"user_sunday_active_day_count" : {
"type" : "keyword"
},
"user_thursday_active_day_count" : {
"type" : "keyword"
},
"user_tuesday_active_day_count" : {
"type" : "keyword"
},
"user_view_cost_day_count" : {
"type" : "keyword"
},
"user_view_resource_count" : {
"type" : "keyword"
},
"user_view_top1_extension" : {
"type" : "keyword"
},
"user_view_top2_extension" : {
"type" : "keyword"
},
"user_view_top3_extension" : {
"type" : "keyword"
},
"user_view_unit_count" : {
"type" : "keyword"
},
"user_wednesday_active_day_count" : {
"type" : "keyword"
}
}
},
"settings" : {
"index" : {
"creation_date" : "1668155178595",
"number_of_shards" : "3",
"number_of_replicas" : "3",
"uuid" : "fxLjpBR3QFaryKY0zohk4Q",
"version" : {
"created" : "7060299"
},
"provided_name" : "user_data"
}
}
}
}
数据量:40w