个性推荐升级

1.推荐系统上线技术方案

出入参设计

public class PreparationResourcesPersonalityRequest{

    /**
     * 章节目录code
     */
    private String chapterCode;

    /**
     * 课本code
     */
    private String testBookCode;

    /**
     * 学科
     */
    @NotNull(message = "学科编码不能为null")
    private String subjectCode;

    /**
     * 学段
     */
    @NotNull(message = "学段编码不能为null")
    private String phaseCode;

    /**
     * 年级1
     */
    private String grade;


    /**
     * 资源类型-一级资源目录
     */
    private String firstApplicationDirectory;

    /**
     * 资源类型-技术应用类型
     */
    private String technologyApplicationType;


    /**
     * 用户id
     */
    private String userId;

    /**
     * 用户学校id
     */
    private String userSchoolId;

    /**
     * 用户所在省份
     */
    private String userProvinceCode;

    /**
     * 用户所在市
     */
    private String userCityCode;

    /**
     * 用户所在区
     */
    private String userAreaCode;


    /**
     * 去重位数
     */
    private Integer deDuplicationIndex;


    /**
     * 当前页数
     */
    private Integer currentPage = 1;


    /**
     * 没页的数量
     */
    private Integer pageSize = 20;

    /**
     * 资源过滤 非空:推荐usedRange与其有交集的资源;空:推荐usedRange“01”资源 - 废弃
     */
    @Deprecated
    private List<String> resKey;

    /**
     * 资源过滤
     */
    private List<PreparationResources> resources;
}
public class ClassResRecParam {

    /**
     * 溯源ID。不能为空
     */
    private String traceId;

    /**
     * 特征列表。不能为空
     */
    private List<UserClassFeature> features;

    @Data
    public static class UserClassFeature {
        /* 统计类特征 */
        private float resViewCount;
        private float resLast7DViewCount;
        private float resLast30DViewCount;
        private float resYtdViewCount;
        private float resReferenceCount;
        private float resLast7DReferenceCount;
        private float resLast30DReferenceCount;
        private float resYtdReferenceCount;
        private float userres7DViewCount;
        private float userres7DReferenceCount;
        private float userres7DEbookViewCount;
        private float userres7DEbookReferenceCount;
        private float userres30DViewCount;
        private float userres30DReferenceCount;
        private float userres30DEbookViewCount;
        private float userres30DEbookReferenceCount;
        private float userViewUnitCount;
        private float userViewResourceCount;
        private float userReferenceUnitCount;
        private float userReferenceResourceCount;
        private float userActiveDayCount;
        private float userMondayActiveDayCount;
        private float userTuesdayActiveDayCount;
        private float userWednesdayActiveDayCount;
        private float userThursdayActiveDayCount;
        private float userFridayActiveDayCount;
        private float userSaturdayActiveDayCount;
        private float userSundayActiveDayCount;
        private float userLast30DMinSincePriorActiveDayCount;
        private float userLast30DMaxSincePriorActiveDayCount;
        private float userLast30DAvgSincePriorActiveDayCount;
        private float userMinSincePriorActiveDayCount;
        private float userMaxSincePriorActiveDayCount;
        private float userAvgSincePriorActiveDayCount;
        private float userViewCostDayCount;
        private float userReferenceCostDayCount;
        private float resSize;
        private float resDuration;
        private float pdateDays;
        private float resCreateTimeDays;
        private float resUpdateTimeDays;
        private float resFirstPdateDays;
        private float resLastPdateDays;

        /* 名称类 */
        private long userId;
        private long classId;
        private long bookCode;
        private long bookUnit;
        private long moduleId;
        private long provinceName;
        private long cityName;
        private long districtName;
        private long schoolName;
        private long campusName;
        private long appVersion;
        private long resResourceTitle;
        private long resPublisherName;
        private long resVolumnName;
        private long resStageName;
        private long resQualityLabel;
        private long resProjectName;
        private long resSourceName;
        private long resPhaseName;
        private long resGradeName;
        private long resSubjectName;
        private long resEditionName;
        private long resStatus;
        private long resExtension;
        private long resAuthorName;
        private long resUploaderName;
        private long resTechTypeName;
        private long resUseTypeName;
        private long resSubUseTypeName;
        private long userViewTop3Extension;
        private long userReferenceTop3Extension;
        private long userViewTop1Extension;
        private long userViewTop2Extension;
        private long userReferenceTop1Extension;
        private long userReferenceTop2Extension;
        private long pdateYear;
        private long pdateMonth;
        private long pdateWeek;
        private long pdateMday;
        private long resCreateTimeYear;
        private long resCreateTimeMonth;
        private long resCreateTimeWeek;
        private long resCreateTimeMday;
        private long resUpdateTimeYear;
        private long resUpdateTimeMonth;
        private long resUpdateTimeWeek;
        private long resUpdateTimeMday;
        private long resFirstPdateYear;
        private long resFirstPdateMonth;
        private long resFirstPdateWeek;
        private long resFirstPdateMday;
        private long resLastPdateYear;
        private long resLastPdateMonth;
        private long resLastPdateWeek;
        private long resLastPdateMday;

        // 资源id
        private long resourceId;
        private long lag1ResourceId;
        private long lag2ResourceId;
        private long lag3ResourceId;
    }
}

流程:

注意:1.出入参一致性同步

2. Python脚本转成Java (预计2天)

  • 提前预处理
  • 使用scala

3. 脚本耗时

流程注意事项:

1.用户冷启动问题

2.线上数据查询方案验证

业务性能指标: 1.5s 以内

查出40个id :1s内

查询耗时:1s内    当前耗时:20s

策略:

  • 剥离交互信息
  • 剥离left join   变成点查
  • 非关系型数据库
  • 缓存策略:用户请求

时间点:今天

优化方案:

  1. 先过滤数据  不要有join group sum

结果:

t2表查询耗时:平均0.9s   数据量:79W

t3表查询耗时:平均0.23s  数据量:36W

t4表查询耗时:平均1.5s  数据量:190W

3.缓存方案及验证

PUT test_cache
{
  "mappings": {
    "properties": {
      "user_id": {
        "type": "keyword"
      },
      "province_id": {
        "type": "keyword"
      },
      "province_name": {
        "type": "keyword"
      },
      "city_id": {
        "type": "keyword"
      },
      "city_name": {
        "type": "keyword"
      },
      "district_id": {
        "type": "keyword"
      },
      "district_name": {
        "type": "keyword"
      },
      "school_id": {
        "type": "keyword"
      },
      "school_name": {
        "type": "keyword"
      },
      "campus_id": {
        "type": "keyword"
      },
      "campus_name": {
        "type": "keyword"
      },
      "book_code": {
        "type": "keyword"
      },
      "unit1": {
        "type": "keyword"
      },
      "unit2": {
        "type": "keyword"
      },
      "unit3": {
        "type": "keyword"
      },
      "unit4": {
        "type": "keyword"
      },
      "book_name": {
        "type": "keyword"
      },
      "resource_id": {
        "type": "keyword"
      },
      "module_id": {
        "type": "keyword"
      },
      "app_version": {
        "type": "keyword"
      },
      "lag1_resource_id": {
        "type": "keyword"
      },
      "lag2_resource_id": {
        "type": "keyword"
      },
      "lag3_resource_id": {
        "type": "keyword"
      },
      "book_unit": {
        "type": "keyword"
      },
      "res_resource_title": {
        "type": "keyword"
      },
      "res_publisher_code": {
        "type": "keyword"
      },
      "res_publisher_name": {
        "type": "keyword"
      },
      "res_volumn_code": {
        "type": "keyword"
      },
      "res_volumn_name": {
        "type": "keyword"
      },
      "res_stage_code": {
        "type": "keyword"
      },
      "res_stage_name": {
        "type": "keyword"
      },
      "res_create_time": {
        "type": "keyword"
      },
      "res_update_time": {
        "type": "keyword"
      },
      "res_type_code": {
        "type": "keyword"
      },
      "res_quality_label": {
        "type": "keyword"
      },
      "res_project_code": {
        "type": "keyword"
      },
      "res_project_name": {
        "type": "keyword"
      },
      "res_source_code": {
        "type": "keyword"
      },
      "res_source_name": {
        "type": "keyword"
      },
      "res_phase_code": {
        "type": "keyword"
      },
      "res_phase_name": {
        "type": "keyword"
      },
      "res_grade_code": {
        "type": "keyword"
      },
      "res_grade_name": {
        "type": "keyword"
      },
      "res_subject_code": {
        "type": "keyword"
      },
      "res_subject_name": {
        "type": "keyword"
      },
      "res_edition_code": {
        "type": "keyword"
      },
      "res_edition_name": {
        "type": "keyword"
      },
      "res_status": {
        "type": "keyword"
      },
      "res_file_name": {
        "type": "keyword"
      },
      "res_extension": {
        "type": "keyword"
      },
      "res_author_id": {
        "type": "keyword"
      },
      "res_author_name": {
        "type": "keyword"
      },
      "res_uploader_id": {
        "type": "keyword"
      },
      "res_uploader_name": {
        "type": "keyword"
      },
      "res_tech_type_code": {
        "type": "keyword"
      },
      "res_tech_type_name": {
        "type": "keyword"
      },
      "res_use_type_code": {
        "type": "keyword"
      },
      "res_use_type_name": {
        "type": "keyword"
      },
      "res_sub_use_type_code": {
        "type": "keyword"
      },
      "res_sub_use_type_name": {
        "type": "keyword"
      },
      "res_size": {
        "type": "keyword"
      },
      "res_duration": {
        "type": "keyword"
      },
      "res_first_pdate": {
        "type": "keyword"
      },
      "res_last_pdate": {
        "type": "keyword"
      },
      "res_view_count": {
        "type": "keyword"
      },
      "res_last7d_view_count": {
        "type": "keyword"
      },
      "res_last30d_view_count": {
        "type": "keyword"
      },
      "res_ytd_view_count": {
        "type": "keyword"
      },
      "res_reference_count": {
        "type": "keyword"
      },
      "res_last7d_reference_count": {
        "type": "keyword"
      },
      "res_last30d_reference_count": {
        "type": "keyword"
      },
      "res_ytd_reference_count": {
        "type": "keyword"
      },
      "userres7d_view_count": {
        "type": "keyword"
      },
      "userres7d_reference_count": {
        "type": "keyword"
      },
      "userres7d_ebook_view_count": {
        "type": "keyword"
      },
      "userres7d_ebook_reference_count": {
        "type": "keyword"
      },
      "userres30d_view_count": {
        "type": "keyword"
      },
      "userres30d_reference_count": {
        "type": "keyword"
      },
      "userres30d_ebook_view_count": {
        "type": "keyword"
      },
      "userres30d_ebook_reference_count": {
        "type": "keyword"
      },
      "user_view_unit_count": {
        "type": "keyword"
      },
      "user_view_cost_day_count": {
        "type": "keyword"
      },
      "user_view_resource_count": {
        "type": "keyword"
      },
      "user_view_top1_extension": {
        "type": "keyword"
      },
      "user_view_top2_extension": {
        "type": "keyword"
      },
      "user_view_top3_extension": {
        "type": "keyword"
      },
      "user_reference_unit_count": {
        "type": "keyword"
      },
      "user_reference_cost_day_count": {
        "type": "keyword"
      },
      "user_reference_resource_count": {
        "type": "keyword"
      },
      "user_reference_top1_extension": {
        "type": "keyword"
      },
      "user_reference_top2_extension": {
        "type": "keyword"
      },
      "user_reference_top3_extension": {
        "type": "keyword"
      },
      "user_active_day_count": {
        "type": "keyword"
      },
      "user_monday_active_day_count": {
        "type": "keyword"
      },
      "user_tuesday_active_day_count": {
        "type": "keyword"
      },
      "user_wednesday_active_day_count": {
        "type": "keyword"
      },
      "user_thursday_active_day_count": {
        "type": "keyword"
      },
      "user_friday_active_day_count": {
        "type": "keyword"
      },
      "user_saturday_active_day_count": {
        "type": "keyword"
      },
      "user_sunday_active_day_count": {
        "type": "keyword"
      },
      "user_last30d_min_since_prior_active_day_count": {
        "type": "keyword"
      },
      "user_last30d_max_since_prior_active_day_count": {
        "type": "keyword"
      },
      "user_last30d_avg_since_prior_active_day_count": {
        "type": "keyword"
      },
      "user_min_since_prior_active_day_count": {
        "type": "keyword"
      },
      "user_max_since_prior_active_day_count": {
        "type": "keyword"
      },
      "user_avg_since_prior_active_day_count": {
        "type": "keyword"
      }
    }
  }
}

测试:使用es   80w数据 40个资源 es terms耗时6ms

GET act_di3/_search
{
  "query": {
    "terms": {
      "resource_id": [
        "000000a4207c4662808f602070c95d07",
"00000378a3f94586ac8a45884108373f",
"0000045f2a034744a0e58be41f03415f",
"000004f6fda14f5190a5c54a132aa22e",
"0000050dbd81473187a73c3b41caa415",
"000005ecfea74f10b8d98eff31bd2905",
"00000727387b49acb3b09253cf45d96c",
"0000089f30bb491baf2a38f577dab87a",
"000009a387014ab38f872fa36bacd79a",
"00000c459dc94476a0220ddd13d35158",
"00000cfe9e4d4ad6863aeaa5c6808372",
"00000f162f4644f3be0e11d1145b612d",
"00000ff923624941a03e25d76c33202b",
"0000117eb77a4311a69684d181ed2fc3",
"00001463a0bb41d990927140cc1d506e",
"000014bf0eb34db9948ef6a45d80b29e",
"0000194250844192a92af9bad59bd561",
"00001954282a43f3bea28cde34e48b2a",
"00001c34d05d4fc8b662bc50085650df",
"00001f77c1ed4432a04c8eb11fcfaaa6",
"00001fd977af4c8e915f62baaf468c92",
"000020e4c84d4687aef906aff4bbf9cc",
"000021b025bf11e899f0a0d37a6353f2",
"0000235ab8294b7c9ce90566ccd41dc5",
"000026344a584a45845efea5e39085e7",
"0000273758bc46b293b6c12e9bd482e0",
"00002768fe0442e784ffb479f94265c2",
"000027bf6f4b46b986026c9b7d7daecf",
"0000284a01a04c129c6dcec0a7443f9e",
"00002a281d9444e6a9f0b8e8170b6975",
"00002ad7cb4c4b299971cc0b393579c5",
"00002b875bb6425184ef9a012ff69947",
"00002bec27d048aa9df9e4c42c8b0fec",
"00002caf8e7e4871b121e887ef1df792",
"00002e4be7744ce6a61fdf344f82bd08",
"00002e8d85f24f78b1136c0e744e4ed8",
"0000314fdc7e494291847244ce12fefe",
"000031ae86734541a842beb6b87066db",
"0000332ec0124ef9adfb8c008543838e",
"00003444d60b4ae8815fa48dc1adc6af"
      ]
    }
  }
}

缓存表验证:

目前数据只插入30W,查询40个资源id,耗时7ms

需求:需要将数据(800W)插入ES

时间点:今天之前

缓存表覆盖度约56%情况下:

数据量:8284000

效果 :1074ms

缓存表覆盖度80+% 数据量 效果

ES极限

ES分开点查: 多个索引- MySql数据插入到Es:

//        Stopwatch sw = Stopwatch.createStarted();
//        IClassResRec tmp = ClassResRecRawImpl.open();
//        sw.stop();
//        System.out.println(("初始化耗时:{}ms" + sw.elapsed(TimeUnit.MILLISECONDS)));

//        //将在线计算结果转成模型入参
//        ClassResRecParam param = new ClassResRecParam();
//        ArrayList<ClassResRecParam.UserClassFeature> paramList = new ArrayList<>();
//        ClassResRecParam.UserClassFeature userClassFeature = new ClassResRecParam.UserClassFeature();
//        for (PersonalizeResourcesResponse personalizeResourcesResponse : responseList) {
//            BeanUtils.copyProperties(personalizeResourcesResponse,userClassFeature);
//            paramList.add(userClassFeature);
//        }
//        param.setTraceId(UUID.randomUUID().toString());
//        param.setFeatures(paramList);
//        ClassResRecResult predict = null;
//        try {
//            for (int i = 0; i < 10; i++) {
//                sw.reset().start();
//                predict = tmp.predict(param);
//                System.out.println(("模型输出:{} 耗时:{}ms" + predict + sw.stop().elapsed(TimeUnit.MILLISECONDS)));
//            }
//        } catch (Exception e) {
//            e.printStackTrace();
//        }
//        System.out.println(predict);
//        tmp.close();

4.上线流程和范围

上线目标:

1、共性推荐:针对开放版本课堂—开放素材推荐及课件推荐

2、个性推荐:优化课堂个性化备课资源推荐,实现精准推荐

上线范围:

1、共性推荐:针对全学科学段

2、个性推荐:针对初中数学,后续根据业务放开其他学科

5.数据链路

Mong:172.31.184.197:27017 paisou

dws_zhkt_resource_profile_di dws_zhkt_user_profile_di

ES索引结构评审

{
  "resource_data" : {
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "book_code" : {
          "type" : "keyword"
        },
        "book_code_orig" : {
          "type" : "keyword"
        },
        "lag1_resource_id" : {
          "type" : "keyword"
        },
        "lag2_resource_id" : {
          "type" : "keyword"
        },
        "lag3_resource_id" : {
          "type" : "keyword"
        },
        "pdate" : {
          "type" : "keyword"
        },
        "pdate_days" : {
          "type" : "keyword"
        },
        "pdate_mday" : {
          "type" : "keyword"
        },
        "pdate_month" : {
          "type" : "keyword"
        },
        "pdate_week" : {
          "type" : "keyword"
        },
        "pdate_year" : {
          "type" : "keyword"
        },
        "res_author_name" : {
          "type" : "keyword"
        },
        "res_create_time" : {
          "type" : "keyword"
        },
        "res_create_time_days" : {
          "type" : "keyword"
        },
        "res_create_time_mday" : {
          "type" : "keyword"
        },
        "res_create_time_month" : {
          "type" : "keyword"
        },
        "res_create_time_week" : {
          "type" : "keyword"
        },
        "res_create_time_year" : {
          "type" : "keyword"
        },
        "res_duration" : {
          "type" : "keyword"
        },
        "res_edition_name" : {
          "type" : "keyword"
        },
        "res_extension" : {
          "type" : "keyword"
        },
        "res_first_pdate" : {
          "type" : "keyword"
        },
        "res_first_pdate_days" : {
          "type" : "keyword"
        },
        "res_first_pdate_mday" : {
          "type" : "keyword"
        },
        "res_first_pdate_month" : {
          "type" : "keyword"
        },
        "res_first_pdate_week" : {
          "type" : "keyword"
        },
        "res_first_pdate_year" : {
          "type" : "keyword"
        },
        "res_grade_name" : {
          "type" : "keyword"
        },
        "res_last30d_reference_count" : {
          "type" : "keyword"
        },
        "res_last30d_view_count" : {
          "type" : "keyword"
        },
        "res_last7d_reference_count" : {
          "type" : "keyword"
        },
        "res_last7d_view_count" : {
          "type" : "keyword"
        },
        "res_last_pdate" : {
          "type" : "keyword"
        },
        "res_last_pdate_days" : {
          "type" : "keyword"
        },
        "res_last_pdate_mday" : {
          "type" : "keyword"
        },
        "res_last_pdate_month" : {
          "type" : "keyword"
        },
        "res_last_pdate_week" : {
          "type" : "keyword"
        },
        "res_last_pdate_year" : {
          "type" : "keyword"
        },
        "res_phase_name" : {
          "type" : "keyword"
        },
        "res_project_name" : {
          "type" : "keyword"
        },
        "res_publisher_name" : {
          "type" : "keyword"
        },
        "res_quality_label" : {
          "type" : "keyword"
        },
        "res_reference_count" : {
          "type" : "keyword"
        },
        "res_resource_title" : {
          "type" : "keyword"
        },
        "res_size" : {
          "type" : "keyword"
        },
        "res_source_name" : {
          "type" : "keyword"
        },
        "res_stage_name" : {
          "type" : "keyword"
        },
        "res_status" : {
          "type" : "keyword"
        },
        "res_sub_use_type_name" : {
          "type" : "keyword"
        },
        "res_subject_name" : {
          "type" : "keyword"
        },
        "res_tech_type_name" : {
          "type" : "keyword"
        },
        "res_update_time" : {
          "type" : "keyword"
        },
        "res_update_time_days" : {
          "type" : "keyword"
        },
        "res_update_time_mday" : {
          "type" : "keyword"
        },
        "res_update_time_month" : {
          "type" : "keyword"
        },
        "res_update_time_week" : {
          "type" : "keyword"
        },
        "res_update_time_year" : {
          "type" : "keyword"
        },
        "res_uploader_name" : {
          "type" : "keyword"
        },
        "res_use_type_name" : {
          "type" : "keyword"
        },
        "res_view_count" : {
          "type" : "keyword"
        },
        "res_volumn_name" : {
          "type" : "keyword"
        },
        "res_ytd_reference_count" : {
          "type" : "keyword"
        },
        "res_ytd_view_count" : {
          "type" : "keyword"
        },
        "resource_id" : {
          "type" : "keyword"
        },
        "resource_id_orig" : {
          "type" : "keyword"
        }
      }
    },
    "settings" : {
      "index" : {
        "creation_date" : "1668155178334",
        "number_of_shards" : "3",
        "number_of_replicas" : "3",
        "uuid" : "N6-cud1JRAq2x2NmbUsEmg",
        "version" : {
          "created" : "7060299"
        },
        "provided_name" : "resource_data"
      }
    }
  }

数据量:107W

{
  "user_data" : {
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "user_active_day_count" : {
          "type" : "keyword"
        },
        "user_avg_since_prior_active_day_count" : {
          "type" : "keyword"
        },
        "user_friday_active_day_count" : {
          "type" : "keyword"
        },
        "user_id" : {
          "type" : "keyword"
        },
        "user_id_orig" : {
          "type" : "keyword"
        },
        "user_last30d_avg_since_prior_active_day_count" : {
          "type" : "keyword"
        },
        "user_last30d_max_since_prior_active_day_count" : {
          "type" : "keyword"
        },
        "user_last30d_min_since_prior_active_day_count" : {
          "type" : "keyword"
        },
        "user_max_since_prior_active_day_count" : {
          "type" : "keyword"
        },
        "user_min_since_prior_active_day_count" : {
          "type" : "keyword"
        },
        "user_monday_active_day_count" : {
          "type" : "keyword"
        },
        "user_reference_cost_day_count" : {
          "type" : "keyword"
        },
        "user_reference_resource_count" : {
          "type" : "keyword"
        },
        "user_reference_top1_extension" : {
          "type" : "keyword"
        },
        "user_reference_top2_extension" : {
          "type" : "keyword"
        },
        "user_reference_top3_extension" : {
          "type" : "keyword"
        },
        "user_reference_unit_count" : {
          "type" : "keyword"
        },
        "user_saturday_active_day_count" : {
          "type" : "keyword"
        },
        "user_sunday_active_day_count" : {
          "type" : "keyword"
        },
        "user_thursday_active_day_count" : {
          "type" : "keyword"
        },
        "user_tuesday_active_day_count" : {
          "type" : "keyword"
        },
        "user_view_cost_day_count" : {
          "type" : "keyword"
        },
        "user_view_resource_count" : {
          "type" : "keyword"
        },
        "user_view_top1_extension" : {
          "type" : "keyword"
        },
        "user_view_top2_extension" : {
          "type" : "keyword"
        },
        "user_view_top3_extension" : {
          "type" : "keyword"
        },
        "user_view_unit_count" : {
          "type" : "keyword"
        },
        "user_wednesday_active_day_count" : {
          "type" : "keyword"
        }
      }
    },
    "settings" : {
      "index" : {
        "creation_date" : "1668155178595",
        "number_of_shards" : "3",
        "number_of_replicas" : "3",
        "uuid" : "fxLjpBR3QFaryKY0zohk4Q",
        "version" : {
          "created" : "7060299"
        },
        "provided_name" : "user_data"
      }
    }
  }
}

数据量:40w

发表评论

您的邮箱地址不会被公开。 必填项已用 * 标注

滚动至顶部