-
Notifications
You must be signed in to change notification settings - Fork 1
/
ref.bib
678 lines (628 loc) · 28.9 KB
/
ref.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
This file was created with JabRef 2.2b2.
Encoding: Cp1252
PHDTHESIS {Uppe97a,
TITLE = "Theory and Algorithms for Hidden {M}arkov Models and Generalized Hidden {M}arkov Models",
AUTHOR = "D. R. Upper",
SCHOOL = "University of California",
ADDRESS = "Berkeley",
NOTE = "{P}ublished by University Microfilms Intl, Ann Arbor, Michigan",
YEAR = 1997}
@ARTICLE {Blac57a,
TITLE = "On the Identifiability Problem for Functions of {Markov} Chains",
AUTHOR = "D. Blackwell and L. Koopmans",
JOURNAL = "Ann. Math. Statist.",
VOLUME = 28, PAGES = 1011,
YEAR = 1957}
@STRING{AnnMathStat = {Ann. Math. Stat.}}
@STRING{AppStat = {Appl. Stat.}}
@STRING{IEEEIT = {IEEE Trans. Inform. Theory}}
@STRING{IEEENN = {IEEE Trans. Neur. Net.}}
@STRING{JourAppProb = {Jour. Appl. Prob.}}
@STRING{PNAS = {Proc. Natl. Acad. Sci.}}
@STRING{PRA = {Phys. Rev. A}}
@STRING{PRE = {Phys. Rev. E}}
@STRING{PRL = {Phys. Rev. Lett.}}
@STRING{RMP = {Rev. Mod. Phys.}}
@BOOK{Abramowitz1965,
title = {Handbook of Mathematical Functions},
publisher = {Dover},
year = {1965},
author = {Milton~Abramowitz and Irene~A.~Stegun},
address = {New York}
}
@ARTICLE{TWAnderson1957,
author = {T.~W.~Anderson and Leo~A.~Goodman},
title = {Statistical Inference About Markov Chains},
journal = AnnMathStat,
year = {1957},
volume = {28},
pages = {89 -- 110},
number = {1},
abstract = {Maximum likelihood estimates and their asymptotic distribution are
obtained for the transition probabilities in a Markov chain of arbitrary
order when there are repeated observations of the chain. Likelihood
ratio tests and $\chi^2$-tests of the form used in contingency tables
are obtained for testing the following hypotheses: (a) that the
transition probabilities of a first order chain are constant, (b)
that in case the transition probabilities are constant, they are
specified numbers, and (c) that the process is a $u$th order Markov
chain against the alternative it is $r$th but not $u$th order. In
case $u = 0$ and $r = 1$, case (c) results in tests of the null
hypothesis that observations at successive time points are statistically
independent against the alternate hypothesis that observations are
from a first order Markov chain. Tests of several other hypotheses
are also considered. The statistical analysis in the case of a single
observation of a long chain is also discussed. There is some discussion
of the relation between likelihood ratio criteria and $\chi^2$-tests
of the form used in contingency tables.},
annote = {Markov Chain},
keywords = {Markov Chain, Inference, Maximum Likelihood},
pdf = {TWAnderson1957.pdf}
}
@ARTICLE{Avery1999,
author = {Peter~J.~Avery and Daniel~A.~Henderson},
title = {Fitting Markov Chain Models to Discrete State Series Such as {DNA}
Sequences},
journal = AppStat,
year = {1999},
volume = {48},
pages = {53 -- 61},
number = {1},
abstract = {Discrete state series such as DNA sequences can often be modelled
by Markov chains. The analysis of such series is discussed in the
context of log-linear models. The data produce contingency tables
with similar margins due to the dependence of the observations.
However, despite the unusual structure of the tables, the analysis
is equivalent to that for data from multinomial sampling. The reason
why the standard number of degrees of freedom is correct is explained
by using theoretical arguments and the asymptotic distribution of
the deviance is verified empirically. Problems involved with fitting
high order Markov chain models, such as reduced power and computational
expense, are also discussed.},
annote = {Markov Chain},
keywords = {Markov Chain, Inference},
pdf = {\Avery1999.pdf}
}
@BOOK{Baldi2001,
title = {Bioinformatics: The Machine Learning Approach},
publisher = {MIT Press},
year = {2001},
author = {Pierre~Baldi and S{\o}ren~Brunak},
address = {Cambridge}
}
@ARTICLE{Berry1990,
author = {K.J. Berry and {P.W. Mielke, Jr.} and G.W. Cran},
title = {Algorithm {AS R83}: A Remark on Algorithm {AS 109} (Inverse of the
Incomplete Beta Function Ratio)},
journal = AppStat,
year = {1990},
volume = {39},
pages = {309-310},
number = {2},
owner = {admin},
timestamp = {2007.02.13}
}
@ARTICLE{Billingsley1961a,
author = {Patrick~Billingsley},
title = {Statistical Methods in Markov Chains},
journal = AnnMathStat,
year = {1961},
volume = {32},
pages = {12 -- 40},
number = {1},
abstract = {This paper is an expository survey of the mathematical aspects of
statistical inference as it applies to finite Markov chains, the
problem being to draw inferences about the transition probabilities
from one long, unbroken observation $\{x_1, x_2, \cdots, x_n\}$
on the chain. The topics covered include Whittle's formula, chi-square
and maximum-likelihood methods, estimation of parameters, and multiple
Markov chains. At the end of the paper it is briefly indicated how
these methods can be applied to a process with an arbitrary state
space or a continuous time parameter. Section 2 contains a simple
proof of Whittle's formula; Section 3 provides an elementary and
self-contained development of the limit theory required for the
application of chi-square methods to finite chains. In the remainder
of the paper, the results are accompanied by references to the literature,
rather than by complete proofs. As is usual in a review paper, the
emphasis reflects the author's interests. Other general accounts
of statistical inference on Markov processes will be found in Grenander
[53], Bartlett [9] and [10], Fortet [35], and in my monograph [18].
I would like to thank Paul Meier for a number of very helpful discussions
on the topics treated in this paper, particularly those of Section
3.},
annote = {Markov Chain},
keywords = {Markov Chain, Inference, Maximum Likelihood},
pdf = {Billingsley1961.pdf}
}
@ARTICLE{Chatfield1973,
author = {Christopher Chatfield},
title = {Statistical Inference Regarding Markov Chain Models},
journal = AppStat,
year = {1973},
volume = {22},
pages = {7-20},
number = {1},
abstract = {The paper reviews different techniques for examining sequential dependencies
in a series of observations each of which can have c possible outcomes.
The relationship between the likelihood ratio test and information
theory is described. The paper also considers how the techniques
need to be modified in situations where two successive outcomes
are always different.},
annote = {Markov Chain},
keywords = {Markov Chains, Information Theory, Inference, Likelihood Ratio, Model
Comparison},
pdf = {\Chatfield1973.pdf}
}
@BOOK{Cover1991,
title = {Elements of Information Theory},
publisher = {Wiley-Interscience},
year = {1991},
author = {Thomas~M.~Cover and Joy~A.~Thomas},
address = {New York}
}
@ARTICLE{Cran1977,
author = {G.W. Cran and K.J. Martin and G.E. Thomas},
title = {Remark {AS R19} and Algorithm {AS 109}--A Remark on Algorithms {AS
63} (The Incomplete Integral) and {AS 64} (Inverse of the Incomplete
Beta Function Ratio)},
journal = AppStat,
year = {1977},
volume = {26},
pages = {111-114},
number = {1},
owner = {admin},
timestamp = {2007.02.13}
}
@ARTICLE{Crutchfield1994,
author = {James~P.~Crutchfield},
title = {The Calculi of Emergence: Computation, Dynamics, and Induction},
journal = {Physica D},
year = {1994},
volume = {75},
pages = {11-54},
abstract = {Defining structure and detecting the emergence of complexity in nature
are inherently subjective, though essential, scientific activities.
Despite the difficulties, these problems can be analyzed in terms
of how model-building observers infer from measurements the computational
capabilities embedded in nonlinear processes. An observer's notion
of what is ordered, what is random, and what is complex in its environment
depends directly on its computational resources: the amount of raw
measurement data, of memory, and of time available for estimation
and inference. The discovery of structure in an environment depends
more critically and subtlely, though, on how those resources are
organized. The descriptive power of the observer's chosen (or implicit)
computational model class, for example, can be an overwhelming determinant
in finding regularity in data. This paper presents an overview of
an inductive framework -- hierarchical epsilon machine reconstruction
-- in which the emergence of complexity is associated with the innovation
of new computational model classes. Complexity metrics for detecting
structure and quantifying emergence, along with an analysis of the
constraints on the dynamics of innovation, are outlined. Illustrative
examples are drawn from the onset of unpredictability in nonlinear
systems, finitary nondeterministic processes, and cellular automata
pattern recognition. They demonstrate how finite inference resources
drive the innovation of new structures and so lead to the emergence
of complexity.},
pdf = {\Crutchfield1994.pdf}
}
@ARTICLE{Crutchfield1997,
author = {James~P.~Crutchfield and David~P.~Feldman},
title = {Statistical Complexity of Simple One-Dimensional Spin Systems},
journal = PRE,
year = {1997},
volume = {55},
pages = {R1239 -- R1242},
number = {2},
abstract = {We present exact results for two complementary measures of spatial
structure generated by one-dimensional spin systems with finite-range
interactions. The first, excess entropy, measures the apparent spatial
memory stored in configurations. The second, statistical complexity,
measures the amount of memory needed to optimally predict the chain
of spin values in configurations. These statistics capture distinct
properties and are different from existing thermodynamic quantities.},
pdf = {\Crutchfield1997.pdf}
}
@ARTICLE{Crutchfield1983,
author = {J.~P.~Crutchfield and N.~H.~Packard},
title = {Symbolic Dynamics of Noisy Chaos},
journal = {Physica D},
year = {1983},
volume = {7D},
pages = {201 -- 223},
abstract = {One model of randomness observed in physical systems is that low-dimensional
deterministic chaotic attractors underly the observations. A phenomenological
theory of chaotic dynamics requires an accounting of the information
flow from the observed system to the observer, the amount of information
available in observations, and just how this information affects
predictions of the system's future behavior. In an effort to develop
such a description, we discuss the information theory of highly
discretized observations of random behavior. Metric entropy and
topological entropy are well-defined invariant measures of such
an attractor's "level of chaos", and are computable using symbolic
dynamics. Real physical systems that display low dimensional dynamics
are, however, inevitably coupled to high-dimensional randomness,
e.g. thermal noise. We investigate the effects of such fluctuations
coupled to deterministic chaotic systems, in particular, the metric
entropy's response to the fluctuations. We find that the entropy
increases with a power law in the noise level, and that the convergence
of the entropy and the effect of fluctuations can be cast as a scaling
theory. We also argue that in addition to the metric entropy, there
is a second scaling invariant quantity that characterizes a deterministic
system with added fluctuations: I0, the maximum average information
obtainable about the initial condition that produces a particular
sequence of measurements (or symbols).},
annote = {SMS},
keywords = {Chaos, Sumbolic Dynamics, Noise, Partitions},
pdf = {\Crutchfield1983.pdf}
}
@ARTICLE{Crutchfield1982,
author = {J. P. Crutchfield and N. H. Packard},
title = {Symbolic Dynamics of One-Dimensional Maps: Entropies, Finite Precision,
and Noise},
journal = {Int. J. Theo. Phys.},
year = {1982},
volume = {21},
pages = {433 - 466},
abstract = {In the study of nonlinear physical systems, one encounters apparently
random or chaotic behavior, although the systems may be completely
deterministic. Applying techniques from symbolic dynamics to maps
of the interval, we compute two measures of chaotic behavior commonly
employed in dynamical systems theory: the topological and metric
entropies. For the quadratic logistic equation, we find that the
metric entropy converges very slowly in comparison to maps which
are strictly hyperbolic. The effects of finite precision arithmetic
and external noise on chaotic behavior are characterized with the
symbolic dynamics entropies. Finally, we discuss the relationship
of these measures of chaos to algorithmic complexity, and use algorithmic
information theory as a framework to discuss the construction of
models for chaotic dynamics.},
annote = {SMS},
keywords = {Chaos, Symbolic Dyanamics, Entropy, Stochastic}
}
@BOOK{Durbin1998,
title = {Biological Sequence Analysis},
publisher = {Camridge University Press},
year = {1998},
author = {R.~Durbin and S.~Eddy and A.~Krogh and G.~Mitchison},
address = {Cambridge}
}
@BOOK{BLHao1998,
title = {Applied Symbolic Dynamics and Chaos},
publisher = {World Scientific},
year = {1998},
author = {Bai-Lin~Hao and Wei-Mou~Zheng}
}
@ARTICLE{Katz1981,
author = {Richard~W.~Katz},
title = {On Some Criteria for Estimating the Order of a Markov Chain},
journal = {Technometrics},
year = {1981},
volume = {23},
pages = {243 -- 249},
number = {3},
abstract = {Tong (1975) has proposed a procedure for estimating the order of a
Markov chain based on Akaike's information criterion (AIC). In this
paper, the asymptotic distribution of the AIC estimator is derived
and it is shown that the estimator is inconsistent. As an alternative
to the AIC procedure, the Bayesian information criterion (BIC) proposed
by Schwarz (1978) is shown to be consistent. These two procedures
yield different estimated orders when applied to specific samples
of meteorological observations. For parameters based on these meteorological
examples, the AIC and BIC procedures are compared by means of simulation
for finite samples. The results obtained have practical implications
concerning whether, in the routine fitting of precipitation data,
it is necessary to consider higher than first-order Markov chains.},
annote = {Markov Chain},
keywords = {Markov Chain, Information Criteria, AIC, BIC, Model Comparison},
pdf = {\Katz1981.pdf}
}
@ARTICLE{JSLiu1999,
author = {Jun~S.~Liu and Charles~E.~Lawrence},
title = {Bayesian Inference on Biopolymer Models},
journal = {Bioinformatics},
year = {1999},
volume = {15},
pages = {38 -- 52},
number = {1},
annote = {Markov Chain},
keywords = {Markov Chain, Bayes, Inference, Dirichlet, Bioinformatics, Model Comparison},
pdf = {\JSLiu1999.pdf}
}
@BOOK{MacKay2003,
opttitle = {Information Theory, Inference, and Learning Algorithms},
publisher = {Cambridge University Press},
year = {2003},
author = {D. MacKay},
address = {Cambridge},
keywords = {Information Theory, Inference, Bayes, Type Theory, Neural Networks}
}
@ARTICLE{MacKay1994,
author = {David~J.~C.~MacKay and Linda~C.~Bauman~Peto},
title = {A Hierarchical Dirichlet Language Model},
journal = {Nat. Lang. Eng.},
year = {1994},
volume = {1},
number = {1},
abstract = {We discuss a hierarchical probabilistic model whose predictions are
similar to those of the popular language modelling procedure known
as `smoothing'. A number of interesting differences from smoothing
emerge. The insights gained from a probabilistic view of this problem
point towards new directions for language modelling. The ideas of
this paper are also applicable to other problems such as the modelling
of triphomes in speech, and DNA and protein sequences in molecular
biology. The new algorithm is compared with smoothing on a two million
word corpus. The methods prove to be about equally accurate, with
the hierarchical model using fewer computational resources.},
annote = {Markov Chain, Inference},
keywords = {Inference, Markov Chain, Dirichlet, Bayesian},
pdf = {MacKay1994.pdf}
}
@ARTICLE{Majumder1973,
author = {K.L. Majumder and G.P. Bhattacharjee},
title = {Algorithm {AS 64}: Inverse of the Incomplete Beta Function Ratio},
journal = AppStat,
year = {1973},
volume = {22},
pages = {411-414},
number = {3},
owner = {admin},
timestamp = {2007.02.13}
}
@ARTICLE{Majumder1973a,
author = {K.L. Majumder and G.P. Bhattacharjee},
title = {Algorithm {AS 63}: The Incomplete Beta Integral},
journal = AppStat,
year = {1973},
volume = {22},
pages = {409-411},
number = {3},
owner = {admin},
timestamp = {2007.02.13}
}
@ARTICLE{Menendez1999,
author = {M.~L.~Men{\'e}ndez and D.~Morales and L.~Pardo and K.~Zografos},
title = {Statistical Inference for Finite Markov Chains Based on Divergences},
journal = {Stat. Prob. Lett.},
year = {1999},
volume = {41},
pages = {9 --17},
number = {1},
abstract = {We consider statistical data forming sequences of states of stationary
finite irreducible Markov chains, and draw statistical inference
about the transition matrix. The inference consists in estimation
of parameters of transition probabilities and testing simple and
composite hypotheses about them. The inference is based on statistics
which are suitable weighted sums of normed -divergences of theoretical
row distributions, evaluated at suitable points, and observed empirical
row distributions. The asymptotic distribution of minimum -divergence
estimators is obtained, as well as critical values of asymptotically
a-level tests.},
annote = {Markov},
keywords = {Markov Chain, Inference, Minimum Distance, Divergence Statistics},
pdf = {\Menendez1999.pdf}
}
@ARTICLE{JRissanen1984,
author = {Rissanen, J.},
title = {Universal coding, information, prediction, and estimation},
journal = IEEEIT,
year = {1984},
volume = {30},
pages = {629--636},
number = {4},
abstract = {A connection between universal codes and the problems of prediction
and statistical estimation is established. A known lower bound for
the mean length of universal codes is sharpened and generalized,
and optimum universal codes constructed. The bound is defined to
give the information in strings relative to the considered class
of processes. The earlier derived minimum description length criterion
for estimation of parameters, including their number, is given a
fundamental information, theoretic justification by showing that
its estimators achieve the information in the strings. It is also
shown that one cannot do prediction in Gaussian autoregressive moving
average (ARMA) processes below a bound, which is determined by the
information in the data.},
issn = {0018-9448},
keywords = {Information theory, Parameter estimation, Prediction methods, Source
coding, MDL},
owner = {admin},
pdf = {\JRissanen1984.pdf},
timestamp = {2007.02.27}
}
@ARTICLE{Samengo2002,
author = {Ines Samengo},
title = {Estimating Probabilities from Experimental Frequencies},
journal = PRE,
year = {2002},
volume = {65},
pages = {46124},
abstract = {Estimating the probability distribution q governing the behavior of
a certain variable by sampling its value a finite number of times
most typically involves an error. Successive measurements allow
the construction of a histogram, or frequency count f, of each of
the possible outcomes. In this work, the probability that the true
distribution be q, given that the frequency count f was sampled,
is studied. Such a probability may be written as a Gibbs distribution.
A thermodynamic potential, which allows an easy evaluation of the
mean Kullback-Leibler divergence between the true and measured distribution,
is defined. For a large number of samples, the expectation value
of any function of q is expanded in powers of the inverse number
of samples. As an example, the moments, the entropy, and the mutual
information are analyzed.},
annote = {Entropy},
keywords = {Time Series, Bayesian, Entropy, Kolmogorov, Estimation},
pdf = {\Samengo2002.pdf}
}
@ARTICLE{HTong1975,
author = {H.~Tong},
title = {Determination of the Order of a Markov Chain by Akaike's Information
Criterion},
journal = JourAppProb,
year = {1975},
volume = {12},
pages = {488 -- 497},
number = {3},
abstract = {Using Akaike's information criterion, we have presented an objective
procedure for the determination of the order of an ergodic Markov
chain with a finite number of states. The procedure exploits the
asymptotic properties of the maximum likelihood ratio statistics
and Kullback and Leibler's mean information for the discrimination
between two distributions. Numerical illustrations are given, using
data from Bartlett (1966), Good and Gover (1967) and some weather
records.},
annote = {Markov Chain},
keywords = {Markov Chain, Inference, Model Comparison, Information Criteria, AIC},
pdf = {Tong1975.pdf}
}
@ARTICLE{VVapnik1999,
author = {Vapnik, V.N.},
title = {An overview of statistical learning theory},
journal = IEEENN,
year = {1999},
volume = {10},
pages = {988--999},
number = {5},
abstract = {Statistical learning theory was introduced in the late 1960's. Until
the 1990's it was a purely theoretical analysis of the problem of
function estimation from a given collection of data. In the middle
of the 1990's new types of learning algorithms (called support vector
machines) based on the developed theory were proposed. This made
statistical learning theory not only a tool for the theoretical
analysis but also a tool for creating practical algorithms for estimating
multidimensional functions. This article presents a very general
overview of statistical learning theory including both theoretical
and algorithmic aspects of the theory. The goal of this overview
is to demonstrate how the abstract learning theory established conditions
for generalization which are more general than those discussed in
classical statistical paradigms and how the understanding of these
conditions inspired new algorithmic approaches to function estimation
problems},
issn = {1045-9227},
keywords = {estimation theory, generalisation (artificial intelligence), learning
(artificial intelligence), statistical analysis, function estimation,
generalization conditions, multidimensional function estimation,
statistical learning theory, support vector machines},
owner = {admin},
pdf = {\VVapnik1999.pdf},
timestamp = {2007.02.27}
}
@ARTICLE{Vitanyi2000,
author = {Paul M.B. Vit{\'a}nyi and Ming Li},
title = {Minimum Description Length Induction, Bayesianism, and Kolmogorov
Complexity},
journal = IEEEIT,
year = {2000},
volume = {46(2)},
pages = {446},
abstract = {The relationship between the Bayesian approach and the minimum description
length approach is established. We sharpen and clarify the general
modeling principles minimum description length (MDL) and minimum
message length (MML), abstracted as the ideal MDL principle and
defined from Bayess rule by means of Kolmogorov complexity. The
basic condition under which the ideal principle should be applied
is encapsulated as the fundamental inequality, which in broad terms
states that the principle is valid when the data are random, relative
to every contemplated hypothesis and also these hypotheses are random
relative to the (universal) prior. The ideal principle states that
the prior probability associated with the hypothesis should be given
by the algorithmic universal probability, and the sum of the log
universal probability of the model plus the log of the probability
of the data given the model should be minimized. If we restrict
the model class to finite sets then application of the ideal principle
turns into Kolmogorovs minimal sufficient statistic. In general,
we show that data compression is almost always the best strategy,
both in model selection and prediction.},
keywords = {Bayess Rule, Data Compression,Kolmogorov Complexity, MDL, MML, Model
Selection, Prediction, Randomness Test, Universal Distribution.},
pdf = {\Vitanyi2000.pdf}
}
@BOOK{Wilks1962,
title = {Mathematical Statistics},
publisher = {John Wiley \& Sons, Inc.},
year = {1962},
author = {Samuel~S.~Wilks},
address = {New York}
}
@ARTICLE{Wolpert1995,
author = {David~H.~Wolpert and David~R.~Wolf},
title = {Estimating Functions of Probability Distributions from a Finite Set
of Samples},
journal = PRE,
year = {1995},
volume = {52},
pages = {6841},
abstract = {This paper addresses the problem of estimating a function of a probability
distribution from a finite set of samples of that distribution.
A Bayesian analysis of this problem is presented, the optimal properties
of the Bayes estimators are discussed, and as an example of the
formalism, closed form expressions for the Bayes estimators for
the moments of the Shannon entropy function are derived. Then numerical
results are presented that compare the Bayes estimator to the frequency-counts
estimator for the Shannon entropy. We also present the closed form
estimators, all derived elsewhere, for the mutual information, Chi-square
covariance, and some other statistics.},
annote = {Markov Chain},
keywords = {Inference, Bayesian, Multinomial, Entropy, Dirichlet},
pdf = {\Wolpert1995.pdf}
}
@ARTICLE{Young1994,
author = {Karl~Young and James~P.~Crutchfield},
title = {Fluctuation Spectroscopy},
journal = {Chaos, Solitons, and Fractals},
year = {1994},
volume = {4},
pages = {5 -- 39},
number = {1},
abstract = {We review the thermodynamics of estimating the statistical fluctuations
of an observed process. Since any statistical analysis involves
a choice of model class--either explicitly or implicitly --we demonstrate
the benefits of a careful choice. For each of three classes a particular
model is reconstructed from data streams generated by four sample
processes. Then each estimated model's thermodynamic structure is
used to estimate the typical behavior and the magnitude of deviations
for the observed system. These are then compared to the known fluctuation
properties. The type of analysis advocated here, which uses estimated
model class information, recovers the correct statistical structure
of these processes from simulated data. The current alternative--direct
estimation of the Renyi entropy from time series histograms--uses
neither prior nor reconstructed knowledge of the model class. And,
in most cases, it fails to recover the proeess's statistical structure
from finite data--unpredictability is overestimated. In this analysis,
we introduce the fluctuation complexity as a measure of a process's
total range of allowed statistical variation. It is a new and complementary
characteristic in that it differs from the process's information
production rate and its memory capacity.},
annote = {CMG},
keywords = {Chaos, Inference, Type Theory, Thermodynamics},
pdf = {\Young1994.pdf}
}
@comment{jabref-meta: psDirectory:C:\\MyReferences;}
@comment{jabref-meta: selector_abstract:Coupled Map;}
@comment{jabref-meta: selector_keywords:}
@comment{jabref-meta: selector_title:}
@comment{jabref-meta: pdfDirectory:C:\\MyReferences;}
@comment{jabref-meta: groupsversion:3;}
@comment{jabref-meta: groupstree:
0 AllEntriesGroup:;
1 KeywordGroup:Markov Chains\;0\;keywords\;Markov Chain\;0\;0\;;
1 KeywordGroup:Single Molecule\;0\;keywords\;Single Molecule\;0\;0\;;
1 ExplicitGroup:BIKOMC Paper\;0\;Abramowitz1965\;Avery1999\;BLHao1998\
;Baldi2001\;Berry1990\;Billingsley1961a\;Chatfield1973\;Cover1991\;Cr
an1977\;Crutchfield1982\;Crutchfield1983\;Crutchfield1994\;Crutchfiel
d1997\;Durbin1998\;HTong1975\;JRissanen1984\;JSLiu1999\;Katz1981\;Mac
Kay1994\;MacKay2003\;Majumder1973\;Majumder1973a\;Menendez1999\;Samen
go2002\;TWAnderson1957\;VVapnik1999\;Vitanyi2000\;Wilks1962\;Wolpert1
995\;Young1994\;;
1 KeywordGroup:Coupled Maps\;0\;keywords\;Coupled Map\;0\;0\;;
1 KeywordGroup:Ofria\;0\;author\;Ofria\;0\;0\;;
1 ExplicitGroup:dissertation\;0\;Abarbanel1993\;Abramowitz1965\;Avery1
999\;BLHao1998\;Baldi2001\;Berry1990\;Billingsley1961a\;Bjornstad2001
\;Breeden1990\;Chatfield1973\;Cover1991\;Cran1977\;Crutchfield1982\;C
rutchfield1983\;Crutchfield1994\;Crutchfield1997\;Cvitanovic1988\;Dur
bin1998\;Farmer1987\;Fox1995\;Gershenfeld1999\;Grossmann1977\;HTong19
75\;Harlim2005\;Hively2004\;JSLiu1999\;Katz1981\;Kolmogorov1958\;Kolm
ogorov1959\;Lasota1994\;Lehnertz1998\;MacKay1994\;MacKay2003\;Majumde
r1973\;Majumder1973a\;May1976\;McSharry1999\;Menendez1999\;Packard198
0\;Piesin1977\;Pisarenko2004\;Rossler1976\;Samengo2002\;Shaw1981\;Smi
th2002\;TWAnderson1957\;Wilks1962\;Wolpert1995\;;
1 KeywordGroup:MDL\;0\;keywords\;MDL\;0\;0\;;
}