m0o0scar · August 12, 2024 00:20
diff --git a/📖 KGLens! Towards Efficient and Effective Knowledge Probing of Large Language Models with Knowle.md b/📖 KGLens! Towards Efficient and Effective Knowledge Probing of Large Language Models with Knowle.md
diff --git a/🪣 content.md b/🪣 content.md
diff --git a/🪣 info.json b/🪣 info.json
 {
  "url": "https://arxiv.org/abs/2312.11539#",
  "type": "arxiv",
  "title": "KGLens: Towards Efficient and Effective Knowledge Probing of Large Language Models with Knowledge Graphs",
  "subtitle": "Authors: Shangshang Zheng, He Bai, Yizhe Zhang, Yi Su, Xiaochuan Niu, Navdeep Jaitly",
  "description": "Abstract: Large Language Models (LLMs) might hallucinate facts, while curated Knowledge Graph (KGs) are typically factually reliable especially with domain-specific knowledge. Measuring the alignment between KGs and LLMs can effectively probe the factualness and identify the knowledge blind spots of LLMs. However, verifying the LLMs over extensive KGs can be expensive. In this paper, we present KGLens, a Thompson-sampling-inspired framework aimed at effectively and efficiently measuring the alignment between KGs and LLMs. KGLens features a graph-guided question generator for converting KGs into natural language, along with a carefully designed importance sampling strategy based on parameterized KG structure to expedite KG traversal. Our simulation experiment compares the brute force method with KGLens under six different sampling methods, demonstrating that our approach achieves superior probing efficiency. Leveraging KGLens, we conducted in-depth analyses of the factual accuracy of ten LLMs across three large domain-specific KGs from Wikidata, composing over 19K edges, 700 relations, and 21K entities. Human evaluation results indicate that KGLens can assess LLMs with a level of accuracy nearly equivalent to that of human annotators, achieving 95.7% of the accuracy rate."
 }

	αjsubscript𝛼𝑗\displaystyle\alpha_{j}italic_α start_POSTSUBSCRIPT italic_j end_POSTSUBSCRIPT	=αj+𝕀⁢(response is incorrect)+Mj,absentsubscript𝛼𝑗𝕀response is incorrectsubscript𝑀𝑗\displaystyle=\alpha_{j}+\mathbb{I}(\text{response is incorrect})+M_{j},= italic_α start_POSTSUBSCRIPT italic_j end_POSTSUBSCRIPT + blackboard_I ( response is incorrect ) + italic_M start_POSTSUBSCRIPT italic_j end_POSTSUBSCRIPT ,		(2)
---	---	---	---	---
	βjsubscript𝛽𝑗\displaystyle\beta_{j}italic_β start_POSTSUBSCRIPT italic_j end_POSTSUBSCRIPT	=βj+𝕀⁢(response is correct)+Nj,absentsubscript𝛽𝑗𝕀response is correctsubscript𝑁𝑗\displaystyle=\beta_{j}+\mathbb{I}(\text{response is correct})+N_{j},= italic_β start_POSTSUBSCRIPT italic_j end_POSTSUBSCRIPT + blackboard_I ( response is correct ) + italic_N start_POSTSUBSCRIPT italic_j end_POSTSUBSCRIPT ,		(3)
	{
	"url": "https://arxiv.org/abs/2312.11539#",
	"type": "arxiv",
	"title": "KGLens: Towards Efficient and Effective Knowledge Probing of Large Language Models with Knowledge Graphs",
	"subtitle": "Authors: Shangshang Zheng, He Bai, Yizhe Zhang, Yi Su, Xiaochuan Niu, Navdeep Jaitly",
	"description": "Abstract: Large Language Models (LLMs) might hallucinate facts, while curated Knowledge Graph (KGs) are typically factually reliable especially with domain-specific knowledge. Measuring the alignment between KGs and LLMs can effectively probe the factualness and identify the knowledge blind spots of LLMs. However, verifying the LLMs over extensive KGs can be expensive. In this paper, we present KGLens, a Thompson-sampling-inspired framework aimed at effectively and efficiently measuring the alignment between KGs and LLMs. KGLens features a graph-guided question generator for converting KGs into natural language, along with a carefully designed importance sampling strategy based on parameterized KG structure to expedite KG traversal. Our simulation experiment compares the brute force method with KGLens under six different sampling methods, demonstrating that our approach achieves superior probing efficiency. Leveraging KGLens, we conducted in-depth analyses of the factual accuracy of ten LLMs across three large domain-specific KGs from Wikidata, composing over 19K edges, 700 relations, and 21K entities. Human evaluation results indicate that KGLens can assess LLMs with a level of accuracy nearly equivalent to that of human annotators, achieving 95.7% of the accuracy rate."
	}