diff --git a/publications.bib b/publications.bib index eb9cb03..286ddbe 100644 --- a/publications.bib +++ b/publications.bib @@ -1,361 +1,169 @@ -@article{10.1145/2719943.2719947, -author = {Sugiyama, Kazunari and Kan, Min-Yen}, -title = {"Towards higher relevance and serendipity in scholarly paper recommendation" by Kazunari Sugiyama and Min-Yen Kan with Martin Vesely as coordinator}, -year = {2015}, -issue_date = {Winter 2015}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -volume = {2015}, -number = {Winter}, -issn = {1931-1745}, -url = {https://doi.org/10.1145/2719943.2719947}, -doi = {10.1145/2719943.2719947}, -abstract = {Finding relevant scholarly papers is an important task for researchers. Such a literature search involves identifying drawbacks in existing works and proposing new approaches that address them. However, the growing number of scientific published papers results in information overload even for simple searches, such that researchers have difficulty in finding papers relevant to their interests. Recommendation systems can help address this problem to find relevant papers efficiently. In this article, we summarize our work on scholarly paper recommendation from both relevance and serendipitous perspectives. Experimental results on a publicly-available scholarly paper recommendation dataset show that our proposed approaches provides promising recommendations for researchers, outperforming the state-of-the-art with statistical significance.}, -journal = {SIGWEB Newsl.}, -month = {feb}, -articleno = {4}, -numpages = {16} -} - -@inproceedings{10.1145/2911451.2911489, -author = {He, Xiangnan and Zhang, Hanwang and Kan, Min-Yen and Chua, Tat-Seng}, -title = {Fast Matrix Factorization for Online Recommendation with Implicit Feedback}, -year = {2016}, -isbn = {9781450340694}, +@inproceedings{10.1145/3132847.3132946, +author = {Halder, Kishaloy and Kan, Min-Yen and Sugiyama, Kazunari}, +title = {Health Forum Thread Recommendation Using an Interest Aware Topic Model}, +year = {2017}, +isbn = {9781450349185}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2911451.2911489}, -doi = {10.1145/2911451.2911489}, -abstract = {This paper contributes improvements on both the effectiveness and efficiency of Matrix Factorization (MF) methods for implicit feedback. We highlight two critical issues of existing works. First, due to the large space of unobserved feedback, most existing works resort to assign a uniform weight to the missing data to reduce computational complexity. However, such a uniform assumption is invalid in real-world settings. Second, most methods are also designed in an offline setting and fail to keep up with the dynamic nature of online data. We address the above two issues in learning MF models from implicit feedback. We first propose to weight the missing data based on item popularity, which is more effective and flexible than the uniform-weight assumption. However, such a non-uniform weighting poses efficiency challenge in learning the model. To address this, we specifically design a new learning algorithm based on the element-wise Alternating Least Squares (eALS) technique, for efficiently optimizing a MF model with variably-weighted missing data. We exploit this efficiency to then seamlessly devise an incremental update strategy that instantly refreshes a MF model given new feedback. Through comprehensive experiments on two public datasets in both offline and online protocols, we show that our implemented, open-source (https://github.com/hexiangnan/sigir16-eals) eALS consistently outperforms state-of-the-art implicit MF methods.}, -booktitle = {Proceedings of the 39th International ACM SIGIR Conference on Research and Development in Information Retrieval}, -pages = {549–558}, +url = {https://doi.org/10.1145/3132847.3132946}, +doi = {10.1145/3132847.3132946}, +abstract = {We introduce a general, interest-aware topic model (IATM), in which known higher-level interests on topics expressed by each user can be modeled. We then specialize the IATM for use in consumer health forum thread recommendation by equating each user's self-reported medical conditions as interests and topics as symptoms of treatments for recommendation. The IATM additionally models the implicit interests embodied by users' textual descriptions in their profiles. To further enhance the personalized nature of the recommendations, we introduce jointly normalized collaborative topic regression (JNCTR) which captures how users interact with the various symptoms belonging to the same clinical condition. In our experiments on two real-world consumer health forums, our proposed model significantly outperforms competitive state-of-the-art baselines by over 10\% in recall. Importantly, we show that our IATM+JNCTR pipeline also imbues the recommendation process with added transparency, allowing a recommendation system to justify its recommendation with respect to each user's interest in certain health conditions.}, +booktitle = {Proceedings of the 2017 ACM on Conference on Information and Knowledge Management}, +pages = {1589–1598}, numpages = {10}, -keywords = {ALS, coordinate descent, implicit feedback, item recommendation, matrix factorization, online learning}, -location = {Pisa, Italy}, -series = {SIGIR '16} +keywords = {topic models, recommender systems, graphical model, collaborative filtering}, +location = {Singapore, Singapore}, +series = {CIKM '17} } -@inproceedings{10.1145/2806416.2806504, -author = {He, Xiangnan and Chen, Tao and Kan, Min-Yen and Chen, Xiao}, -title = {TriRank: Review-aware Explainable Recommendation by Modeling Aspects}, -year = {2015}, -isbn = {9781450337946}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2806416.2806504}, -doi = {10.1145/2806416.2806504}, -abstract = {Most existing collaborative filtering techniques have focused on modeling the binary relation of users to items by extracting from user ratings. Aside from users' ratings, their affiliated reviews often provide the rationale for their ratings and identify what aspects of the item they cared most about. We explore the rich evidence source of aspects in user reviews to improve top-N recommendation. By extracting aspects (i.e., the specific properties of items) from textual reviews, we enrich the user--item binary relation to a user--item--aspect ternary relation. We model the ternary relation as a heterogeneous tripartite graph, casting the recommendation task as one of vertex ranking. We devise a generic algorithm for ranking on tripartite graphs -- TriRank -- and specialize it for personalized recommendation. Experiments on two public review datasets show that it consistently outperforms state-of-the-art methods. Most importantly, TriRank endows the recommender system with a higher degree of explainability and transparency by modeling aspects in reviews. It allows users to interact with the system through their aspect preferences, assisting users in making informed decisions.}, -booktitle = {Proceedings of the 24th ACM International on Conference on Information and Knowledge Management}, -pages = {1661–1670}, -numpages = {10}, -keywords = {tripartite graph ranking, top-n recommendation, reviews, explanable recommendation, comments, aspects}, +@inproceedings{10.5555/3171837.3171848, +author = {Lei, Wenqiang and Wang, Xuancong and Liu, Meichun and Ilievski, Ilija and He, Xiangnan and Kan, Min-Yen}, +title = {SWIM: a simple word interaction model for implicit discourse relation recognition}, +year = {2017}, +isbn = {9780999241103}, +publisher = {AAAI Press}, +abstract = {Capturing the semantic interaction of pairs of words across arguments and proper argument representation are both crucial issues in implicit discourse relation recognition. The current state-of-the-art represents arguments as distributional vectors that are computed via bi-directional Long Short-Term Memory networks (BiLSTMs), known to have significant model complexity.In contrast, we demonstrate that word-weighted averaging can encode argument representation which can be incorporated with word pair information efficiently. By saving an order of magnitude in parameters and eschewing the recurrent structure, our proposed model achieves equivalent performance, but trains seven times faster.}, +booktitle = {Proceedings of the 26th International Joint Conference on Artificial Intelligence}, +pages = {4026–4032}, +numpages = {7}, location = {Melbourne, Australia}, -series = {CIKM '15} +series = {IJCAI'17} } -@inproceedings{10.1145/1099554.1099649, -author = {Kan, Min-Yen and Thi, Hoang Oanh Nguyen}, -title = {Fast webpage classification using URL features}, -year = {2005}, -isbn = {1595931406}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1099554.1099649}, -doi = {10.1145/1099554.1099649}, -abstract = {We demonstrate the usefulness of the uniform resource locator (URL) alone in performing web page classification. This approach is faster than typical web page classification, as the pages do not have to be fetched and analyzed. Our approach segments the URL into meaningful chunks and adds component, sequential and orthographic features to model salient patterns. The resulting features are used in supervised maximum entropy modeling. We analyze our approach's effectiveness on two standardized domains. Our results show that in certain scenarios, URL-based methods approach the performance of current state-of-the-art full-text and link-based methods.}, -booktitle = {Proceedings of the 14th ACM International Conference on Information and Knowledge Management}, -pages = {325–326}, -numpages = {2}, -keywords = {webpage classification, uniform resource locator}, -location = {Bremen, Germany}, -series = {CIKM '05} -} - -@inproceedings{10.1145/1816123.1816129, -author = {Sugiyama, Kazunari and Kan, Min-Yen}, -title = {Scholarly paper recommendation via user's recent research interests}, -year = {2010}, -isbn = {9781450300858}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1816123.1816129}, -doi = {10.1145/1816123.1816129}, -abstract = {We examine the effect of modeling a researcher's past works in recommending scholarly papers to the researcher. Our hypothesis is that an author's published works constitute a clean signal of the latent interests of a researcher. A key part of our model is to enhance the profile derived directly from past works with information coming from the past works' referenced papers as well as papers that cite the work. In our experiments, we differentiate between junior researchers that have only published one paper and senior researchers that have multiple publications. We show that filtering these sources of information is advantageous -- when we additionally prune noisy citations, referenced papers and publication history, we achieve statistically significant higher levels of recommendation accuracy.}, -booktitle = {Proceedings of the 10th Annual Joint Conference on Digital Libraries}, -pages = {29–38}, -numpages = {10}, -keywords = {user modeling, recommendation, information retrieval, digital library}, -location = {Gold Coast, Queensland, Australia}, -series = {JCDL '10} -} - -@inproceedings{10.1145/3340531.3412046, -author = {Nguyen, Van-Hoang and Sugiyama, Kazunari and Nakov, Preslav and Kan, Min-Yen}, -title = {FANG: Leveraging Social Context for Fake News Detection Using Graph Representation}, -year = {2020}, -isbn = {9781450368599}, +@article{10.1002/asi.23834, +author = {An, Juyoung and Kim, Namhee and Kan, Min-Yen and Chandrasekaran, Muthu Kumar and Song, Min}, +title = {Exploring characteristics of highly cited authors according to citation location and content}, +year = {2017}, +issue_date = {August 2017}, +publisher = {John Wiley \& Sons, Inc.}, +address = {USA}, +volume = {68}, +number = {8}, +issn = {2330-1635}, +url = {https://doi.org/10.1002/asi.23834}, +doi = {10.1002/asi.23834}, +abstract = {Big Science and cross-disciplinary collaborations have reshaped the intellectual structure of research areas. A number of works have tried to uncover this hidden intellectual structure by analyzing citation contexts. However, none of them analyzed by document logical structures such as sections. The two major goals of this study are to find characteristics of authors who are highly cited section-wise and to identify the differences in section-wise author networks. This study uses 29,158 of research articles culled from the ACL Anthology, which hosts articles on computational linguistics and natural language processing. We find that the distribution of citations across sections is skewed and that a different set of highly cited authors share distinct academic characteristics, according to their citation locations. Furthermore, the author networks based on citation context similarity reveal that the intellectual structure of a domain differs across different sections.}, +journal = {J. Assoc. Inf. Sci. Technol.}, +month = aug, +pages = {1975–1988}, +numpages = {14} +} + +@inproceedings{10.5555/3298023.3298065, +author = {Chandrasekaran, Muthu Kumar and Epp, Carrie Demmans and Kan, Min-Yen and Litman, Diane}, +title = {Using discourse signals for robust instructor intervention prediction}, +year = {2017}, +publisher = {AAAI Press}, +abstract = {We tackle the prediction of instructor intervention in student posts from discussion forums in Massive Open Online Courses (MOOCs). Our key finding is that using automatically obtained discourse relations improves the prediction of when instructors intervene in student discussions, when compared with a state-of-the-art, feature-rich baseline. Our supervised classifier makes use of an automatic discourse parser which outputs Penn Discourse Treebank (PDTB) tags that represent in-post discourse features. We show PDTB relation-based features increase the robustness of the classifier and complement baseline features in recalling more diverse instructor intervention patterns. In comprehensive experiments over 14 MOOC offerings from several disciplines, the PDTB discourse features improve performance on average. The resultant models are less dependent on domain-specific vocabulary, allowing them to better generalize to new courses.}, +booktitle = {Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence}, +pages = {3415–3421}, +numpages = {7}, +location = {San Francisco, California, USA}, +series = {AAAI'17} +} + +@inproceedings{10.1145/2756406.2756946, +author = {Cuong, Nguyen Viet and Chandrasekaran, Muthu Kumar and Kan, Min-Yen and Lee, Wee Sun}, +title = {Scholarly Document Information Extraction using Extensible Features for Efficient Higher Order Semi-CRFs}, +year = {2015}, +isbn = {9781450335942}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, -url = {https://doi.org/10.1145/3340531.3412046}, -doi = {10.1145/3340531.3412046}, -abstract = {We propose Factual News Graph (FANG), a novel graphical social context representation and learning framework for fake news detection. Unlike previous contextual models that have targeted performance, our focus is on representation learning. Compared to transductive models, FANG is scalable in training as it does not have to maintain all nodes, and it is efficient at inference time, without the need to re-process the entire graph. Our experimental results show that FANG is better at capturing the social context into a high fidelity representation, compared to recent graphical and non-graphical models. In particular, FANG yields significant improvements for the task of fake news detection, and it is robust in the case of limited training data. We further demonstrate that the representations learned by FANG generalize to related tasks, such as predicting the factuality of reporting of a news medium.}, -booktitle = {Proceedings of the 29th ACM International Conference on Information \& Knowledge Management}, -pages = {1165–1174}, -numpages = {10}, -keywords = {social networks, representation learning, graph neural networks, fake news, disinformation}, -location = {Virtual Event, Ireland}, -series = {CIKM '20} +url = {https://doi.org/10.1145/2756406.2756946}, +doi = {10.1145/2756406.2756946}, +abstract = {We address the tasks of recovering bibliographic and document structure metadata from scholarly documents. We leverage higher order semi-Markov conditional random fields to model long-distance label sequences, improving upon the performance of the linear-chain conditional random field model. We introduce the notion of extensible features, which allows the expensive inference process to be simplified through memoization, resulting in lower computational complexity. Our method significantly betters the state-of-the-art on three related scholarly document extraction tasks.}, +booktitle = {Proceedings of the 15th ACM/IEEE-CS Joint Conference on Digital Libraries}, +pages = {61–64}, +numpages = {4}, +keywords = {metadata extraction, logical structure discovery, conditional random fields}, +location = {Knoxville, Tennessee, USA}, +series = {JCDL '15} } -@inproceedings{10.1145/2484028.2484035, +@inproceedings{10.1007/978-3-319-48051-0_15, author = {Lin, Jovian and Sugiyama, Kazunari and Kan, Min-Yen and Chua, Tat-Seng}, -title = {Addressing cold-start in app recommendation: latent user models constructed from twitter followers}, -year = {2013}, -isbn = {9781450320344}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2484028.2484035}, -doi = {10.1145/2484028.2484035}, -abstract = {As a tremendous number of mobile applications (apps) are readily available, users have difficulty in identifying apps that are relevant to their interests. Recommender systems that depend on previous user ratings (i.e., collaborative filtering, or CF) can address this problem for apps that have sufficient ratings from past users. But for apps that are newly released, CF does not have any user ratings to base recommendations on, which leads to the cold-start problem.In this paper, we describe a method that accounts for nascent information culled from Twitter to provide relevant recommendation in such cold-start situations. We use Twitter handles to access an app's Twitter account and extract the IDs of their Twitter-followers. We create pseudo-documents that contain the IDs of Twitter users interested in an app and then apply latent Dirichlet allocation to generate latent groups. At test time, a target user seeking recommendations is mapped to these latent groups. By using the transitive relationship of latent groups to apps, we estimate the probability of the user liking the app. We show that by incorporating information from Twitter, our approach overcomes the difficulty of cold-start app recommendation and significantly outperforms other state-of-the-art recommendation techniques by up to 33\%.}, -booktitle = {Proceedings of the 36th International ACM SIGIR Conference on Research and Development in Information Retrieval}, -pages = {283–292}, -numpages = {10}, -keywords = {twitter, recommender systems, mobile apps, latent user models, collaborative filtering, cold-start problem}, -location = {Dublin, Ireland}, -series = {SIGIR '13} -} - -@inproceedings{10.1145/1076034.1076103, -author = {Cui, Hang and Sun, Renxu and Li, Keya and Kan, Min-Yen and Chua, Tat-Seng}, -title = {Question answering passage retrieval using dependency relations}, -year = {2005}, -isbn = {1595930345}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1076034.1076103}, -doi = {10.1145/1076034.1076103}, -abstract = {State-of-the-art question answering (QA) systems employ term-density ranking to retrieve answer passages. Such methods often retrieve incorrect passages as relationships among question terms are not considered. Previous studies attempted to address this problem by matching dependency relations between questions and answers. They used strict matching, which fails when semantically equivalent relationships are phrased differently. We propose fuzzy relation matching based on statistical models. We present two methods for learning relation mapping scores from past QA pairs: one based on mutual information and the other on expectation maximization. Experimental results show that our method significantly outperforms state-of-the-art density-based passage retrieval methods by up to 78\% in mean reciprocal rank. Relation matching also brings about a 50\% improvement in a system enhanced by query expansion.}, -booktitle = {Proceedings of the 28th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, -pages = {400–407}, -numpages = {8}, -keywords = {dependency parsing, passage retrieval, question answering}, -location = {Salvador, Brazil}, -series = {SIGIR '05} -} - -@inproceedings{10.1145/2467696.2467701, -author = {Sugiyama, Kazunari and Kan, Min-Yen}, -title = {Exploiting potential citation papers in scholarly paper recommendation}, -year = {2013}, -isbn = {9781450320771}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2467696.2467701}, -doi = {10.1145/2467696.2467701}, -abstract = {To help generate relevant suggestions for researchers, recommendation systems have started to leverage the latent interests in the publication profiles of the researchers themselves. While using such a publication citation network has been shown to enhance performance, the network is often sparse, making recommendation difficult. To alleviate this sparsity, we identify "potential citation papers" through the use of collaborative filtering. Also, as different logical sections of a paper have different significance, as a secondary contribution, we investigate which sections of papers can be leveraged to represent papers effectively.On a scholarly paper recommendation dataset, we show that recommendation accuracy significantly outperforms state-of-the-art recommendation baselines as measured by nDCG and MRR, when we discover potential citation papers using imputed similarities via collaborative filtering and represent candidate papers using both the full text and assigning more weight to the conclusion sections.}, -booktitle = {Proceedings of the 13th ACM/IEEE-CS Joint Conference on Digital Libraries}, -pages = {153–162}, -numpages = {10}, -keywords = {citation analysis, collaborative filtering, digital library, information retrieval, recommendation}, -location = {Indianapolis, Indiana, USA}, -series = {JCDL '13} -} - -@inproceedings{10.1145/1141753.1141826, -author = {Tan, Yee Fan and Kan, Min Yen and Lee, Dongwon}, -title = {Search engine driven author disambiguation}, -year = {2006}, -isbn = {1595933549}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1141753.1141826}, -doi = {10.1145/1141753.1141826}, -abstract = {In scholarly digital libraries, author disambiguation is an important task that attributes a scholarly work with specific authors. This is critical when individuals share the same name. We present an approach to this task that analyzes the results of automatically-crafted web searches. A key observation is that pages from rare web sites are stronger source of evidence than pages from common web sites, which we model as Inverse Host Frequency (IHF). Our system is able to achieve an average accuracy of 0.836.}, -booktitle = {Proceedings of the 6th ACM/IEEE-CS Joint Conference on Digital Libraries}, -pages = {314–315}, -numpages = {2}, -keywords = {IHF, author disambiguation, entity resolution}, -location = {Chapel Hill, NC, USA}, -series = {JCDL '06} -} - -@inproceedings{10.1145/1255175.1255213, -author = {Yan, Su and Lee, Dongwon and Kan, Min-Yen and Giles, Lee C.}, -title = {Adaptive sorted neighborhood methods for efficient record linkage}, -year = {2007}, -isbn = {9781595936448}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1255175.1255213}, -doi = {10.1145/1255175.1255213}, -abstract = {Traditionally, record linkage algorithms have played an important role in maintaining digital libraries - i.e., identifying matching citations or authors for consolidation in updating or integrating digital libraries. As such, a variety of record linkage algorithms have been developed and deployed successfully. Often, however, existing solutions have a set of parameters whose values are set by human experts off-lineand are fixed during the execution. Since finding the ideal values of such parameters is not straightforward, or no such single ideal value even exists, the applicability of existing solutions to new scenarios or domains is greatly hampered. To remedy this problem, we argue that one can achieve significant improvement by adaptively and dynamically changing such parameters of record linkage algorithms. To validate our hypothesis, we take a classical record linkage algorithm, the sorted neighborhood method (SNM), and demonstrate how we can achieve improved accuracy and performance by adaptively changing its fixed sliding window size. Our claim is analytically and empirically validated using both real and synthetic data sets of digital libraries and other domains.}, -booktitle = {Proceedings of the 7th ACM/IEEE-CS Joint Conference on Digital Libraries}, -pages = {185–194}, -numpages = {10}, -keywords = {citation matching, entity resolution, record linkage, sorted neighborhood}, -location = {Vancouver, BC, Canada}, -series = {JCDL '07} -} - -@inproceedings{10.1145/2566486.2567975, -author = {He, Xiangnan and Kan, Min-Yen and Xie, Peichu and Chen, Xiao}, -title = {Comment-based multi-view clustering of web 2.0 items}, -year = {2014}, -isbn = {9781450327442}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2566486.2567975}, -doi = {10.1145/2566486.2567975}, -abstract = {Clustering Web 2.0 items (i.e., web resources like videos, images) into semantic groups benefits many applications, such as organizing items, generating meaningful tags and improving web search. In this paper, we systematically investigate how user-generated comments can be used to improve the clustering of Web 2.0 items. In our preliminary study of Last.fm, we find that the two data sources extracted from user comments -- the textual comments and the commenting users -- provide complementary evidence to the items' intrinsic features. These sources have varying levels of quality, but we importantly we find that incorporating all three sources improves clustering. To accommodate such quality imbalance, we invoke multi-view clustering, in which each data source represents a view, aiming to best leverage the utility of different views.To combine multiple views under a principled framework, we propose CoNMF (Co-regularized Non-negative Matrix Factorization), which extends NMF for multi-view clustering by jointly factorizing the multiple matrices through co-regularization. Under our CoNMF framework, we devise two paradigms -- pair-wise CoNMF and cluster-wise CoNMF -- and propose iterative algorithms for their joint factorization. Experimental results on Last.fm and Yelp datasets demonstrate the effectiveness of our solution. In Last.fm, CoNMF betters k-means with a statistically significant F1 increase of 14\%, while achieving comparable performance with the state-of-the-art multi-view clustering method CoSC (Co-regularized Spectral Clustering). On a Yelp dataset, CoNMF outperforms the best baseline CoSC with a statistically significant performance gain of 7\%.}, -booktitle = {Proceedings of the 23rd International Conference on World Wide Web}, -pages = {771–782}, -numpages = {12}, -keywords = {CoNMF, co-regularized nmf, comment-based clustering, multi-view clustering}, -location = {Seoul, Korea}, -series = {WWW '14} -} - -@inproceedings{10.1145/2600428.2609558, -author = {He, Xiangnan and Gao, Ming and Kan, Min-Yen and Liu, Yiqun and Sugiyama, Kazunari}, -title = {Predicting the popularity of web 2.0 items based on user comments}, -year = {2014}, -isbn = {9781450322577}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2600428.2609558}, -doi = {10.1145/2600428.2609558}, -abstract = {In the current Web 2.0 era, the popularity of Web resources fluctuates ephemerally, based on trends and social interest. As a result, content-based relevance signals are insufficient to meet users' constantly evolving information needs in searching for Web 2.0 items. Incorporating future popularity into ranking is one way to counter this. However, predicting popularity as a third party (as in the case of general search engines) is difficult in practice, due to their limited access to item view histories. To enable popularity prediction externally without excessive crawling, we propose an alternative solution by leveraging user comments, which are more accessible than view counts. Due to the sparsity of comments, traditional solutions that are solely based on view histories do not perform well. To deal with this sparsity, we mine comments to recover additional signal, such as social influence. By modeling comments as a time-aware bipartite graph, we propose a regularization-based ranking algorithm that accounts for temporal, social influence and current popularity factors to predict the future popularity of items. Experimental results on three real-world datasets --- crawled from YouTube, Flickr and Last.fm --- show that our method consistently outperforms competitive baselines in several evaluation tasks.}, -booktitle = {Proceedings of the 37th International ACM SIGIR Conference on Research \& Development in Information Retrieval}, -pages = {233–242}, -numpages = {10}, -keywords = {bipartite graph ranking, buir, comments mining, item ranking, popularity prediction}, -location = {Gold Coast, Queensland, Australia}, -series = {SIGIR '14} -} - -@inproceedings{10.1145/988672.988686, -author = {Cui, Hang and Kan, Min-Yen and Chua, Tat-Seng}, -title = {Unsupervised learning of soft patterns for generating definitions from online news}, -year = {2004}, -isbn = {158113844X}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/988672.988686}, -doi = {10.1145/988672.988686}, -abstract = {Breaking news often contains timely definitions and descriptions of current terms, organizations and personalities. We utilize such web sources to construct definitions for such terms. Previous work has identified definitions using hand-crafted rules or supervised learning that constructs rigid, hard text patterns. In contrast, we demonstrate a new approach that uses flexible, soft matching patterns to characterize definition sentences. Our soft patterns are able to effectively accommodate the diversity of definition sentence structure exhibited in news. We use pseudo-relevance feedback to automatically label sentences for use in soft pattern generation. The application of our unsupervised method significantly improves baseline systems on both the standardized TREC corpus as well as crawled online news articles by 27\% and 30\%, respectively, in terms of F measure. When applied to a state-of-art definition generation system recently fielded in the TREC 2003 definitional question answering task, it improves the performance by 14\%.}, -booktitle = {Proceedings of the 13th International Conference on World Wide Web}, -pages = {90–99}, -numpages = {10}, -keywords = {definition generation, definitional question answering, pseudo-relevance feedback, soft patterns, unsupervised learning}, -location = {New York, NY, USA}, -series = {WWW '04} -} - -@inproceedings{10.1145/1013367.1013426, -author = {Kan, Min-Yen}, -title = {Web page classification without the web page}, -year = {2004}, -isbn = {1581139128}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/1013367.1013426}, -doi = {10.1145/1013367.1013426}, -abstract = {Uniform resource locators (URLs), which mark the address of a resource on the World Wide Web, are often human-readable and can hint at the category of the resource. This paper explores the use of URLs for webpage categorization via a two-phase pipeline of word segmentation/expansion and classification. We quantify its performance against document-based methods, which require the retrieval of the source document.}, -booktitle = {Proceedings of the 13th International World Wide Web Conference on Alternate Track Papers \& Posters}, -pages = {262–263}, -numpages = {2}, -keywords = {abbreviation expansion, text categorization, uniform resource locator, word segmentation}, -location = {New York, NY, USA}, -series = {WWW Alt. '04} -} - -@inproceedings{10.1145/2964284.2964291, -author = {Chen, Tao and He, Xiangnan and Kan, Min-Yen}, -title = {Context-aware Image Tweet Modelling and Recommendation}, +title = {Scrutinizing Mobile App Recommendation: Identifying Important App-Related Indicators}, year = {2016}, -isbn = {9781450336031}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2964284.2964291}, -doi = {10.1145/2964284.2964291}, -abstract = {While efforts have been made on bridging the semantic gap in image understanding, the in situ understanding of social media images is arguably more important but has had less progress. In this work, we enrich the representation of images in image tweets by considering their social context. We argue that in the microblog context, traditional image features, e.g., low-level SIFT or high-level detected objects, are far from adequate in interpreting the necessary semantics latent in image tweets. To bridge this gap, we move from the images' pixels to their context and propose a context-aware image bf tweet modelling (CITING) framework to mine and fuse contextual text to model such social media images' semantics. We start with tweet's intrinsic contexts, namely, 1) text within the image itself and 2) its accompanying text; and then we turn to the extrinsic contexts: 3) the external web page linked to by the tweet's embedded URL, and 4) the Web as a whole. These contexts can be leveraged to benefit many fundamental applications. To demonstrate the effectiveness our framework, we focus on the task of personalized image tweet recommendation, developing a feature-aware matrix factorization framework that encodes the contexts as a part of user interest modelling. Extensive experiments on a large Twitter dataset show that our proposed method significantly improves performance. Finally, to spur future studies, we have released both the code of our recommendation model and our image tweet dataset.}, -booktitle = {Proceedings of the 24th ACM International Conference on Multimedia}, -pages = {1018–1027}, -numpages = {10}, -keywords = {twitter, recommendation, microblog, image tweets, image semantics, context}, -location = {Amsterdam, The Netherlands}, -series = {MM '16} -} - -@inproceedings{10.1145/2964284.2964291, -author = {Chen, Tao and He, Xiangnan and Kan, Min-Yen}, -title = {Context-aware Image Tweet Modelling and Recommendation}, +isbn = {978-3-319-48050-3}, +publisher = {Springer-Verlag}, +address = {Berlin, Heidelberg}, +url = {https://doi.org/10.1007/978-3-319-48051-0_15}, +doi = {10.1007/978-3-319-48051-0_15}, +abstract = {Among several traditional and novel mobile app recommender techniques that utilize a diverse set of app-related features (such as an app’s Twitter followers, various version instances, etc.), which app-related features are the most important indicators for app recommendation? In this paper, we develop a hybrid app recommender framework that integrates a variety of app-related features and recommendation techniques, and then identify the most important indicators for the app recommendation task. Our results reveal an interesting correlation with data from third-party app analytics companies; and suggest that, in the context of mobile app recommendation, more focus could be placed in user and trend analysis via social networks.}, +booktitle = {Information Retrieval Technology: 12th Asia Information Retrieval Societies Conference, AIRS 2016, Beijing, China, November 30 – December 2, 2016, Proceedings}, +pages = {197–211}, +numpages = {15}, +keywords = {Recommender systems, Mobile apps, Gradient tree boosting}, +location = {Beijing, China} +} + +@inproceedings{10.1145/2911451.2914698, +author = {Cheng, Jerome and Sugiyama, Kazunari and Kan, Min-Yen}, +title = {Linking Organizational Social Network Profiles}, year = {2016}, -isbn = {9781450336031}, -publisher = {Association for Computing Machinery}, -address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2964284.2964291}, -doi = {10.1145/2964284.2964291}, -abstract = {While efforts have been made on bridging the semantic gap in image understanding, the in situ understanding of social media images is arguably more important but has had less progress. In this work, we enrich the representation of images in image tweets by considering their social context. We argue that in the microblog context, traditional image features, e.g., low-level SIFT or high-level detected objects, are far from adequate in interpreting the necessary semantics latent in image tweets. To bridge this gap, we move from the images' pixels to their context and propose a context-aware image bf tweet modelling (CITING) framework to mine and fuse contextual text to model such social media images' semantics. We start with tweet's intrinsic contexts, namely, 1) text within the image itself and 2) its accompanying text; and then we turn to the extrinsic contexts: 3) the external web page linked to by the tweet's embedded URL, and 4) the Web as a whole. These contexts can be leveraged to benefit many fundamental applications. To demonstrate the effectiveness our framework, we focus on the task of personalized image tweet recommendation, developing a feature-aware matrix factorization framework that encodes the contexts as a part of user interest modelling. Extensive experiments on a large Twitter dataset show that our proposed method significantly improves performance. Finally, to spur future studies, we have released both the code of our recommendation model and our image tweet dataset.}, -booktitle = {Proceedings of the 24th ACM International Conference on Multimedia}, -pages = {1018–1027}, -numpages = {10}, -keywords = {twitter, recommendation, microblog, image tweets, image semantics, context}, -location = {Amsterdam, The Netherlands}, -series = {MM '16} -} - -@inproceedings{10.1145/2502081.2502203, -author = {Chen, Tao and Lu, Dongyuan and Kan, Min-Yen and Cui, Peng}, -title = {Understanding and classifying image tweets}, -year = {2013}, -isbn = {9781450324045}, +isbn = {9781450340694}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2502081.2502203}, -doi = {10.1145/2502081.2502203}, -abstract = {Social media platforms now allow users to share images alongside their textual posts. These image tweets make up a fast-growing percentage of tweets, but have not been studied in depth unlike their text-only counterparts. We study a large corpus of image tweets in order to uncover what people post about and the correlation between the tweet's image and its text. We show that an important functional distinction is between visually-relevant and visually-irrelevant tweets, and that we can successfully build an automated classifier utilizing text, image and social context features to distinguish these two classes, obtaining a macro F1 of 70.5\%.}, -booktitle = {Proceedings of the 21st ACM International Conference on Multimedia}, -pages = {781–784}, +url = {https://doi.org/10.1145/2911451.2914698}, +doi = {10.1145/2911451.2914698}, +abstract = {Many organizations possess social media accounts on different social networks, but these profiles are not always linked. End applications, users, as well as the organization themselves, can benefit when the profiles are appropriately identified and linked. Most existing works on social network entity linking focus on linking individuals, and do not model features specific for organizational linking. We address this gap not only to link official social media accounts but also to discover and solve the identification and linking of associated affiliate accounts -- such as geographical divisions and brands -- which are important to distinguish.We instantiate our method for classifying profiles on social network services for Twitter and Facebook, which major organizations use. We classify profiles as to whether they belong to an organization or its affiliates. Our best classifier achieves an accuracy of 0.976 on average in both datasets, significantly improving baselines that exploit the features used in state-of-the-art comparable user linkage strategies.}, +booktitle = {Proceedings of the 39th International ACM SIGIR Conference on Research and Development in Information Retrieval}, +pages = {901–904}, numpages = {4}, -keywords = {microblog, image tweets, classification, analysis}, -location = {Barcelona, Spain}, -series = {MM '13} +keywords = {organization entity profiling, organizational social profiles, record linkage, social networks}, +location = {Pisa, Italy}, +series = {SIGIR '16} } -@inproceedings{10.1145/2600428.2609560, -author = {Lin, Jovian and Sugiyama, Kazunari and Kan, Min-Yen and Chua, Tat-Seng}, -title = {New and improved: modeling versions to improve app recommendation}, -year = {2014}, -isbn = {9781450322577}, + +@inproceedings{10.1145/2808797.2808820, +author = {Lim, Bang Hui and Lu, Dongyuan and Chen, Tao and Kan, Min-Yen}, +title = {#mytweet via Instagram: Exploring User Behaviour across Multiple Social Networks}, +year = {2015}, +isbn = {9781450338547}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, -url = {https://doi.org/10.1145/2600428.2609560}, -doi = {10.1145/2600428.2609560}, -abstract = {Existing recommender systems usually model items as static -- unchanging in attributes, description, and features. However, in domains such as mobile apps, a version update may provide substantial changes to an app as updates, reflected by an increment in its version number, may attract a consumer's interest for a previously unappealing version. Version descriptions constitute an important recommendation evidence source as well as a basis for understanding the rationale for a recommendation. We present a novel framework that incorporates features distilled from version descriptions into app recommendation. We use a semi-supervised topic model to construct a representation of an app's version as a set of latent topics from version metadata and textual descriptions. We then discriminate the topics based on genre information and weight them on a per-user basis to generate a version-sensitive ranked list of apps for a target user. Incorporating our version features with state-of-the-art individual and hybrid recommendation techniques significantly improves recommendation quality. An important advantage of our method is that it targets particular versions of apps, allowing previously disfavored apps to be recommended when user-relevant features are added.}, -booktitle = {Proceedings of the 37th International ACM SIGIR Conference on Research \& Development in Information Retrieval}, -pages = {647–656}, -numpages = {10}, -keywords = {version sensitive, recommender systems, mobile apps, app store}, -location = {Gold Coast, Queensland, Australia}, -series = {SIGIR '14} +url = {https://doi.org/10.1145/2808797.2808820}, +doi = {10.1145/2808797.2808820}, +abstract = {We study how users of multiple online social networks (OSNs) employ and share information by studying a common user pool that use six OSNs -- Flickr, Google+, Instagram, Tumblr, Twitter, and YouTube. We analyze the temporal and topical signature of users' sharing behaviour, showing how they exhibit distinct behaviorial patterns on different networks. We also examine cross-sharing (i.e., the act of user broadcasting their activity to multiple OSNs near-simultaneously), a previously-unstudied behaviour and demonstrate how certain OSNs play the roles of originating source and destination sinks.}, +booktitle = {Proceedings of the 2015 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining 2015}, +pages = {113–120}, +numpages = {8}, +keywords = {Online Social Networks, cross-sharing, user behaviour}, +location = {Paris, France}, +series = {ASONAM '15} } -@article{10.1109/TKDE.2016.2611584, -author = {He, Xiangnan and Gao, Ming and Kan, Min-Yen and Wang, Dingxian}, -title = {BiRank: Towards Ranking on Bipartite Graphs}, -year = {2017}, -issue_date = {January 2017}, -publisher = {IEEE Educational Activities Department}, -address = {USA}, -volume = {29}, -number = {1}, -issn = {1041-4347}, -url = {https://doi.org/10.1109/TKDE.2016.2611584}, -doi = {10.1109/TKDE.2016.2611584}, -abstract = {The bipartite graph is a ubiquitous data structure that can model the relationship between two entity types: for instance, users and items, queries and webpages. In this paper, we study the problem of ranking vertices of a bipartite graph, based on the graph's link structure as well as prior information about vertices (which we term a query vector ). We present a new solution, BiRank, which iteratively assigns scores to vertices and finally converges to a unique stationary ranking. In contrast to the traditional random walk-based methods, BiRank iterates towards optimizing a regularization function, which smooths the graph under the guidance of the query vector. Importantly, we establish how BiRank relates to the Bayesian methodology, enabling the future extension in a probabilistic way. To show the rationale and extendability of the ranking methodology, we further extend it to rank for the more generic $n$ -partite graphs. BiRank's generic modeling of both the graph structure and vertex features enables it to model various ranking hypotheses flexibly. To illustrate its functionality, we apply the BiRank and TriRank (ranking for tripartite graphs) algorithms to two real-world applications: a general ranking scenario that predicts the future popularity of items, and a personalized ranking scenario that recommends items of interest to users. Extensive experiments on both synthetic and real-world datasets demonstrate BiRank's soundness (fast convergence), efficiency (linear in the number of graph edges), and effectiveness (achieving state-of-the-art in the two real-world tasks).}, -journal = {IEEE Trans. on Knowl. and Data Eng.}, -month = {jan}, -pages = {57–71}, -numpages = {15} -} \ No newline at end of file +@article{10.1007/s00799-014-0122-2, +author = {Sugiyama, Kazunari and Kan, Min-Yen}, +title = {A comprehensive evaluation of scholarly paper recommendation using potential citation papers}, +year = {2015}, +issue_date = {June 2015}, +publisher = {Springer-Verlag}, +address = {Berlin, Heidelberg}, +volume = {16}, +number = {2}, +issn = {1432-5012}, +url = {https://doi.org/10.1007/s00799-014-0122-2}, +doi = {10.1007/s00799-014-0122-2}, +abstract = {To help generate relevant suggestions for researchers, recommendation systems have started to leverage the latent interests in the publication profiles of the researchers themselves. While using such a publication citation network has been shown to enhance performance, the network is often sparse, making recommendation difficult. To alleviate this sparsity, in our former work, we identified "potential citation papers" through the use of collaborative filtering. Also, as different logical sections of a paper have different significance, as a secondary contribution, we investigated which sections of papers can be leveraged to represent papers effectively. While this initial approach works well for researchers vested in a single discipline, it generates poor predictions for scientists who work on several different topics in the discipline (hereafter, "intra-disciplinary"). We thus extend our previous work in this paper by proposing an adaptive neighbor selection method to overcome this problem in our imputation-based collaborative filtering framework. On a publicly-available scholarly paper recommendation dataset, we show that recommendation accuracy significantly outperforms state-of-the-art recommendation baselines as measured by nDCG and MRR, when using our adaptive neighbor selection method. While recommendation performance is enhanced for all researchers, improvements are more marked for intra-disciplinary researchers, showing that our method does address the targeted audience.}, +journal = {Int. J. Digit. Libr.}, +month = jun, +pages = {91–109}, +numpages = {19}, +keywords = {Recommendation, Information retrieval, Digital library, Collaborative filtering, Citation analysis} +} + +@inproceedings{10.5555/2887007.2887012, +author = {Chen, Tao and SalahEldeen, Hany M. and He, Xiangnan and Kan, Min-Yen and Lu, Dongyuan}, +title = {VELDA: relating an image tweet's text and images}, +year = {2015}, +isbn = {0262511290}, +publisher = {AAAI Press}, +abstract = {Image tweets are becoming a prevalent form of social media, but little is known about their content - textual and visual - and the relationship between the two mediums. Our analysis of image tweets shows that while visual elements certainly play a large role in image-text relationships, other factors such as emotional elements, also factor into the relationship. We develop Visual-Emotional LDA (VELDA), a novel topic model to capture the image-text correlation from multiple perspectives (namely, visual and emotional).Experiments on real-world image tweets in both English and Chinese and other user generated content, show that VELDA significantly outperforms existing methods on cross-modality image retrieval. Even in other domains where emotion does not factor in image choice directly, our VELDA model demonstrates good generalization ability, achieving higher fidelity modeling of such multimedia documents.}, +booktitle = {Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence}, +pages = {30–36}, +numpages = {7}, +location = {Austin, Texas}, +series = {AAAI'15} +}