2019-08-16工作进展

  1. mvdssm训练集生成

pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v2.py" -Dcluster='{"worker":{"count":30, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_250,odps://graph_embedding/tables/hs_tmp_251,odps://graph_embedding/tables/hs_tmp_221" -Doutputs="odps://graph_embedding/tables/hs_tmp_211" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_5.ckpt" -DuseSparseClusterSchema=True;

hs_dssm_train_v2_0

  1. 构造训练测试验证集

训练集测试集构造 : se_keyword_mainse_ws,title_mainse_ws,pic_mainse_ws

drop table hs_tmp_220;
yes
create table hs_tmp_220 as
select c., d.emb as title_mainse_ws from (select a., b.emb as se_keyword_mainse_ws from (select * from hs_dssm_train_v2_0)a join (select * from hs_tmp_202)b on a. query_id == b. id)c join (select * from hs_tmp_203)d on c.item_id == d.id;

drop table hs_tmp_223;
yes
create table hs_tmp_223 as
select a.*, b.pic_ws from hs_dssm_train_v2_0 a join hs_tmp_214 b on a.item_id == b.item_id;

drop table hs_tmp_224;
yes
create table hs_tmp_224 as
select distinct a.se_keyword_mainse_ws, a.title_mainse_ws, a.label, b.pic_ws as pic_mainse_ws from hs_tmp_220 a join hs_tmp_223 b on a.item_id == b.item_id;

验证集构造 : query_id, query_ws, video_id, video_ws, pic_ws

drop table hs_tmp_221;
yes
create table hs_tmp_221 as
select distinct e.query_id, e.title_id as video_id, e.query_ws, e.video_ws, f.pic_ws from
(select c., d.emb as video_ws from (select a., b.emb as query_ws from hs_tmp_157 a join hs_tmp_204 b on a.query_id == b.id)c join hs_tmp_205 d on c.title_id == d.id)e join hs_tmp_217 f on e.title_id == f.item_id;

train & inference

create table hs_tmp_231 as select * from hs_tmp_224 limit 1000000;
insert overwrite table hs_tmp_224 select * from hs_tmp_224;
insert overwrite table hs_tmp_224 select * from hs_tmp_224;
insert overwrite table hs_tmp_224 select * from hs_tmp_224;
insert overwrite table hs_tmp_224 select * from hs_tmp_224;
insert overwrite table hs_tmp_224 select * from hs_tmp_224;
insert overwrite table hs_tmp_224 select * from hs_tmp_224;
se_keyword_mainse_ws,title_mainse_ws, pic_mainse_ws, label
query_id, query_ws, video_id, video_ws, pic_ws

pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v2.py" -Dcluster='{"worker":{"count":10, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_224,odps://graph_embedding/tables/hs_tmp_231,odps://graph_embedding/tables/hs_tmp_221" -Doutputs="odps://graph_embedding/tables/hs_tmp_211" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_5.ckpt" -DuseSparseClusterSchema=True;

pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_mv_dssm_v2.py" -Dcluster='{"worker":{"count":50, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_224,odps://graph_embedding/tables/hs_tmp_231,odps://graph_embedding/tables/hs_tmp_221" -Doutputs="odps://graph_embedding/tables/hs_tmp_211" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video_4e_5.ckpt" -DuseSparseClusterSchema=True;

20190816124945153g7e6vyvj2

hs_tmp_252 : | query_id | video_id | score | active_view |
hs_tmp_215: | item_id | title | pic_url |
hs_dssm_dic_query_inf_1 : | id | words_mainse_ids | query |

drop table hs_tmp_248;
yes
create table hs_tmp_248 as select c., d.query from
(select a.
, b.title, b.pic_url from hs_tmp_211 a join hs_tmp_215 b on a.video_id == b.item_id)c join hs_dssm_dic_query_inf_1 d on c.query_id == d.id;

你可能感兴趣的:(2019-08-16工作进展)