传统的转化率建模方式:
y=1
表示用户发生了转化(如购买)z=1
表示用户点击了广告这样做的问题:
ESMM 通过建模两个可观测事件,联合推导出 CVR:
x
│
┌───┴─────┐
│ │
▼ ▼
CTR网络 CTCVR网络
│ │
▼ ▼
P(z=1|x) P(y=1,z=1|x)
│ │
└────┬────┘
▼
CVR(x) = P(y=1|x,z=1) = CTCVR(x)/CTR(x)
x
:样本特征z ∈ {0,1}
:是否点击y ∈ {0,1}
:是否转化(仅在 z=1 时观察)FL(pt)=−αt(1−pt)γlog(pt) FL(p_t) = -\alpha_t (1 - p_t)^\gamma \log(p_t) FL(pt)=−αt(1−pt)γlog(pt)
import torch
import torch.nn.functional as F
def focal_loss(inputs, targets, alpha=0.25, gamma=2.0, reduction='mean'):
ce_loss = F.cross_entropy(inputs, targets, reduction='none') # shape: [B]
pt = torch.exp(-ce_loss) # shape: [B], 概率越大表示模型越自信
focal_loss = alpha * (1 - pt) ** gamma * ce_loss
if reduction == 'mean':
return focal_loss.mean()
elif reduction == 'sum':
return focal_loss.sum()
else:
return focal_loss # no reduction
Faiss(Facebook AI Similarity Search)是由 Facebook 开发的用于高效相似度搜索和密集向量聚类的库,主要用于大规模向量检索。以下是其典型应用领域:
class Solution:
def partition(self, nums, left, right):
pivot = nums[right]
i = left-1
for j in range(left, right):
if nums[j] <= pivot:
i += 1
nums[i], nums[j] = nums[j], nums[i]
nums[i+1], nums[right] = nums[right], nums[i+1]
return i+1
def topk_split(self, nums, k, left, right):
if left<right:
index = self.partition(nums, left, right)
if index == k:
return
elif index < k:
self.topk_split(nums, k, index+1, right)
else:
self.topk_split(nums, k, left, index-1)
def quicksort(self, nums, left, right):
# 快排
if left < right:
index = self.partition(nums, left, right)
quicksort(nums, left, index-1)
quicksort(nums, index+1, right)
def findKthLargest(self, nums: List[int], k: int) -> int:
right = len(nums)-1
# 上面快排是从小到大排序,要找topk大,那就该倒过来n-k
self.topk_split(nums, right-k+1, 0, right)
return nums[right-k+1]