2018
Journal Articles
Adrian-Gabriel Chifu; Florentina Hristea
Feature selection for spectral clustering: to help or not to help spectral clustering when performing sense discrimination for IR? Journal Article
In: Open Computer Science, vol. 8, no. 1, pp. 218–227, 2018.
Abstract | Links | BibTeX | Tags: Information Retrieval, Query Disambiguation, Spectral Clustering, Word Sense Discrimination
@article{chifu2018feature,
title = {Feature selection for spectral clustering: to help or not to help spectral clustering when performing sense discrimination for IR?},
author = {Adrian-Gabriel Chifu and Florentina Hristea},
url = {https://www.degruyter.com/view/journals/comp/8/1/article-p218.xml},
year = {2018},
date = {2018-12-01},
urldate = {2018-12-01},
journal = {Open Computer Science},
volume = {8},
number = {1},
pages = {218--227},
publisher = {Sciendo},
abstract = {Whether or not word sense disambiguation (WSD) can improve information retrieval (IR) results represents a topic that has been intensely debated over the years, with many inconclusive or contradictory conclusions. The most rarely used type of WSD for this task is the unsupervised one, although it has been proven to be beneficial at a large scale. Our study builds on existing research and tries to improve the most recent unsupervised method which is based on spectral clustering. It investigates the possible benefits of “helping” spectral clustering through feature selection when it performs sense discrimination for IR. Results obtained so far, involving large data collections, encourage us to point out the importance of feature selection even in the case of this advanced, state of the art clustering technique that is known for performing its own feature weighting. By suggesting an improvement of what we consider the most promising approach to usage of WSD in IR, and by commenting on its possible extensions, we state that WSD still holds a promise for IR and hope to stimulate continuation of this line of research, perhaps at an even more successful level.},
keywords = {Information Retrieval, Query Disambiguation, Spectral Clustering, Word Sense Discrimination},
pubstate = {published},
tppubtype = {article}
}
2015
Journal Articles
Adrian-Gabriel Chifu; Florentina Hristea; Josiane Mothe; Marius Popescu
Word sense discrimination in information retrieval: A spectral clustering-based approach Journal Article
In: Information Processing & Management, vol. 51, no. 2, pp. 16–31, 2015.
Abstract | Links | BibTeX | Tags: High Precision, Information Retrieval, Spectral Clustering, Word Sense Disambiguation, Word Sense Discrimination
@article{chifu2015word,
title = {Word sense discrimination in information retrieval: A spectral clustering-based approach},
author = {Adrian-Gabriel Chifu and Florentina Hristea and Josiane Mothe and Marius Popescu},
url = {https://hal.archives-ouvertes.fr/hal-01153775/document},
year = {2015},
date = {2015-03-01},
urldate = {2015-01-01},
journal = {Information Processing & Management},
volume = {51},
number = {2},
pages = {16--31},
publisher = {Elsevier},
abstract = {Word sense ambiguity has been identified as a cause of poor precision in information retrieval (IR) systems. Word sense disambiguation and discrimination methods have been defined to help systems choose which documents should be retrieved in relation to an ambiguous query. However, the only approaches that show a genuine benefit for word sense discrimination or disambiguation in IR are generally supervised ones. In this paper we propose a new unsupervised method that uses word sense discrimination in IR. The method we develop is based on spectral clustering and reorders an initially retrieved doc- ument list by boosting documents that are semantically similar to the target query. For several TREC ad hoc collections we show that our method is useful in the case of queries which contain ambiguous terms. We are interested in improving the level of precision after 5, 10 and 30 retrieved documents (P@5, P@10, P@30) respectively. We show that precision can be improved by 8% above current state-of-the-art baselines. We also focus on poor performing queries.},
keywords = {High Precision, Information Retrieval, Spectral Clustering, Word Sense Disambiguation, Word Sense Discrimination},
pubstate = {published},
tppubtype = {article}
}