1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
//! Entropy prediction tables.

use entropy::probabilities::{InstancesToProbabilities, SymbolIndex, SymbolInfo};
pub use io::statistics::Instances;

use binjs_shared::{IOPath, IOPathItem};

use std;
use std::borrow::Borrow;
use std::cell::RefCell;
use std::collections::HashMap;
use std::hash::Hash;
use std::rc::Rc;

#[allow(unused_imports)] // We keep enabling/disabling this.
use itertools::Itertools;
use range_encoding;

/// A newtype for `usize` used to represent an index in a dictionary of values.
#[derive(
    Add,
    Constructor,
    Eq,
    PartialEq,
    Ord,
    PartialOrd,
    Clone,
    Copy,
    From,
    Into,
    Debug,
    Hash,
    Serialize,
    Deserialize,
)]
struct DictionaryIndex(usize);

/// A newtype for `usize` used to represent a reference to a value already encountered.
///
/// By convention, `0` is the latest value, `1` the value before, etc.
#[derive(
    Constructor,
    Eq,
    PartialEq,
    Ord,
    PartialOrd,
    Clone,
    Copy,
    Hash,
    Into,
    Debug,
    Serialize,
    Deserialize,
)]
struct BackReference(usize);

mod context_information {
    use super::Instances;
    use entropy::probabilities::{SymbolIndex, SymbolInfo};

    use std::cell::RefCell;
    use std::collections::HashMap;
    use std::hash::Hash;
    use std::rc::Rc;

    use itertools::Itertools;

    /// A container for the statistics available in a given prediction context
    /// (a typical prediction context is a path in the AST, or a position in
    /// the file, etc.).
    ///
    /// This container is meant to be used in two settings:
    ///
    /// - to count the number of instances of values in a context (instantiated with `Statistics=Instances`); or
    /// - once number of instances have been converted to frequency information and unique indices
    ///     (instantiated with `Statistics=SymbolInfo`, in which case the `SymbolInfo` contains the unique index).
    ///
    /// For this reason, all the meaningful methods of this struct are implemented only if `Statistics=Instances`
    /// or `Statistics=SymbolInfo`.
    #[derive(Clone, Debug, Deserialize, Serialize)]
    pub struct ContextInformation<NodeValue, Statistics>
    where
        NodeValue: Eq + Hash,
    {
        /// NodeValue => Statistics mapping, always valid
        stats_by_node_value: HashMap<NodeValue, Statistics>,

        /// SymbolIndex => NodeValue mapping.
        ///
        /// This vector is populated only when `Statistics = SymbolInfo`. When that is the case,
        /// `value_by_symbol_index` is effectively the reverse mapping from `stats_by_node_value` (using the
        /// index embedded in `SymbolInfo`).
        value_by_symbol_index: Vec<NodeValue>,
    }
    impl<NodeValue, Statistics> ContextInformation<NodeValue, Statistics>
    where
        NodeValue: Eq + Hash,
    {
        pub fn new() -> Self {
            ContextInformation {
                stats_by_node_value: HashMap::new(),
                value_by_symbol_index: Vec::new(),
            }
        }

        /// Return the number of entries.
        pub fn len(&self) -> usize {
            self.stats_by_node_value.len()
        }

        pub fn into_iter(self) -> impl Iterator<Item = (NodeValue, Statistics)> {
            self.stats_by_node_value.into_iter()
        }

        pub fn iter(&self) -> impl Iterator<Item = (&NodeValue, &Statistics)> {
            self.stats_by_node_value.iter()
        }
    }

    // Methods that make sense only when we have finished computing frequency information.
    impl<NodeValue> ContextInformation<NodeValue, SymbolInfo>
    where
        NodeValue: Eq + Hash,
    {
        pub fn stats_by_node_value(&self) -> &HashMap<NodeValue, SymbolInfo> {
            &self.stats_by_node_value
        }

        pub fn stats_by_node_value_mut(&mut self) -> &mut HashMap<NodeValue, SymbolInfo> {
            &mut self.stats_by_node_value
        }

        pub fn value_by_symbol_index(&self, index: SymbolIndex) -> Option<&NodeValue> {
            self.value_by_symbol_index.get(Into::<usize>::into(index))
        }
    }

    // Methods that make sense only while we are collecting instances.

    impl<NodeValue> ContextInformation<NodeValue, Instances>
    where
        NodeValue: Eq + Hash,
    {
        /// Register a value as being used in this context.
        pub fn add(&mut self, node_value: NodeValue) {
            self.stats_by_node_value
                .entry(node_value)
                .and_modify(|instances| *instances += 1.into())
                .or_insert(1.into());
        }

        /// Register a value as being used in this context.
        ///
        /// If the value is already used, do not increase the number of instances.
        pub fn add_if_absent(&mut self, node_value: NodeValue) {
            self.stats_by_node_value
                .entry(node_value)
                .or_insert(1.into());
        }
    }

    impl<NodeValue> ::entropy::probabilities::InstancesToProbabilities
        for ContextInformation<NodeValue, Instances>
    where
        NodeValue: Clone + Eq + Hash + Ord,
    {
        type AsProbabilities = ContextInformation<NodeValue, SymbolInfo>;
        fn instances_to_probabilities(
            &self,
            _description: &str,
        ) -> ContextInformation<NodeValue, SymbolInfo> {
            let stats_by_node_value = self
                .stats_by_node_value
                .iter()
                .sorted_by(|(value_1, _), (value_2, _)| Ord::cmp(value_1, value_2)) // We need to ensure that the order remains stable across process restarts.
                .collect::<Vec<_>>();

            let instances = stats_by_node_value
                .iter()
                .map(|(_, instances)| Into::<usize>::into(**instances) as u32)
                .collect();

            let distribution = std::rc::Rc::new(std::cell::RefCell::new(
                range_encoding::CumulativeDistributionFrequency::new(instances),
            ));

            let (stats_by_node_value, value_by_symbol_index): (HashMap<_, _>, Vec<_>) =
                stats_by_node_value
                    .into_iter()
                    .enumerate()
                    .map(|(index, (value, _))| {
                        let for_stats_by_node_value = (
                            value.clone(),
                            SymbolInfo {
                                index: index.into(),
                                distribution: distribution.clone(),
                            },
                        );
                        let for_value_by_symbol_index = value.clone();
                        (for_stats_by_node_value, for_value_by_symbol_index)
                    })
                    .unzip();
            ContextInformation {
                stats_by_node_value,
                value_by_symbol_index,
            }
        }
    }

    impl<NodeValue> ContextInformation<NodeValue, SymbolInfo>
    where
        NodeValue: Clone + Eq + Hash,
    {
        pub fn frequencies(
            &self,
        ) -> Option<&Rc<RefCell<range_encoding::CumulativeDistributionFrequency>>> {
            self.stats_by_node_value()
                .values()
                .next()
                .map(|any| &any.distribution)
        }
    }
}
use self::context_information::ContextInformation;

/// A generic mechanism used to predict possible values in a given context (e.g.
/// AST path or file position) in a file.
///
/// This mechanism is meant to be used as follows:
///
/// 1. Create a new `ContextPredict<Context, NodeValue, Instances>` and use `ContextPredict::add` to count the number
///     of instances of each possible `NodeValue` in each possible `Context`. Alternatively, load this data
///     from an existing dictionary.
/// 2. Convert the `ContextPredict<Context, NodeValue, Instances>` into a `ContextPredict<Context, NodeValue, SymbolInfo>`
///     by calling `ContextPredict::instances_to_probabilities`.
/// 3. Use method `ContextPredict::<_, _, SymbolInfo>::stats_by_node_value` and `stats_by_node_value_mut` to get the statistics
///     information in a specific context for a specific node value (used for compression). This information contains
///     an index designed to be written to a compressed stream.
///
/// As most methods of this struct can only be used if `Statistics = Instances` xor `Statistics = SymbolInfo`,
/// the implementation of these methods is only available respectively if `Statistics = Instances` or if
/// `Statistics = SymbolInfo`.
///
/// For most use cases, you probably want one of the more specialized predictors.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct ContextPredict<Context, NodeValue, Statistics>
where
    Context: Eq + Hash + Clone,
    NodeValue: Eq + Hash + Clone,
{
    by_context: HashMap<Context, ContextInformation<NodeValue, Statistics>>,
}
impl<Context, NodeValue, Statistics> ContextPredict<Context, NodeValue, Statistics>
where
    Context: Eq + Hash + Clone,
    NodeValue: Eq + Hash + Clone,
{
    pub fn new() -> Self {
        Self {
            by_context: HashMap::new(),
        }
    }

    /// All the contexts known to this predictor.
    ///
    /// Used mainly for debugging.
    pub fn contexts(&self) -> impl Iterator<Item = &Context> {
        self.by_context.keys()
    }

    /// Iterate through this predictor.
    pub fn iter_mut(
        &mut self,
    ) -> impl Iterator<Item = (&Context, &mut ContextInformation<NodeValue, Statistics>)> {
        self.by_context.iter_mut()
    }

    pub fn iter(
        &self,
    ) -> impl Iterator<Item = (&Context, &ContextInformation<NodeValue, Statistics>)> {
        self.by_context.iter()
    }

    /// Convert this predictor into an interator.
    pub fn into_iter(
        self,
    ) -> impl Iterator<Item = (Context, ContextInformation<NodeValue, Statistics>)> {
        self.by_context.into_iter()
    }

    /// The number of states in this predictor.
    pub fn len(&self) -> usize {
        self.by_context.values().map(ContextInformation::len).sum()
    }

    /// Iter through the information available in a given context.
    pub fn iter_at<C: ?Sized>(&self, context: &C) -> impl Iterator<Item = (&NodeValue, &Statistics)>
    where
        Context: std::borrow::Borrow<C>,
        C: Hash + Eq,
    {
        std::iter::Iterator::flatten(
            self.by_context
                .get(context)
                .into_iter()
                .map(|info| info.iter()),
        )
    }
}

impl<Context, NodeValue> ContextPredict<Context, NodeValue, Instances>
where
    Context: Eq + Hash + Clone,
    NodeValue: Eq + Hash + Clone,
{
    /// Register a value as being used in this context.
    pub fn add(&mut self, context: Context, value: NodeValue) {
        let stats_by_node_value = self
            .by_context
            .entry(context)
            .or_insert_with(|| ContextInformation::new());
        stats_by_node_value.add(value)
    }

    /// Register a value as being used in this context.
    ///
    /// If the value is already used, do not increase the number of instances.
    pub fn add_if_absent(&mut self, context: Context, value: NodeValue) {
        let stats_by_node_value = self
            .by_context
            .entry(context)
            .or_insert_with(|| ContextInformation::new());
        stats_by_node_value.add_if_absent(value)
    }
}

impl<Context, NodeValue> ContextPredict<Context, NodeValue, SymbolInfo>
where
    Context: Eq + Hash + Clone,
    NodeValue: Eq + Hash + Clone,
{
    /// Get a value by context and index.
    ///
    /// `candidates` is a list of contexts which may be known to the
    /// predictor. This method looks for the first context of `candidates`
    /// known to the predictor, and uses the index => value mapping for
    /// that context.
    ///
    /// This is typically used to provide fallback dictionaries.
    ///
    /// This method is only implemented when `Statistics=SymbolInfo` as the index is initialized
    /// by `instances_to_probabilities`. The index corresponds to the one defined in
    /// the `SymbolInfo`.
    pub fn value_by_symbol_index<C2: ?Sized>(
        &self,
        candidates: &[&C2],
        index: SymbolIndex,
    ) -> Option<&NodeValue>
    where
        Context: std::borrow::Borrow<C2>,
        C2: Hash + Eq,
    {
        for context in candidates {
            if let Some(table) = self.by_context.get(context) {
                return table.value_by_symbol_index(index);
            }
        }
        None
    }

    /// Get the frequency information in one of several contexts.
    ///
    /// `candidates` is a list of contexts which may be known to the
    /// predictor. This method looks for the first context of `candidates`
    /// known to the predictor, and gets the information that context.
    ///
    /// This is typically used to provide fallback dictionaries.
    pub fn frequencies_at<C2: ?Sized>(
        &self,
        candidates: &[&C2],
    ) -> Option<&Rc<RefCell<range_encoding::CumulativeDistributionFrequency>>>
    where
        Context: std::borrow::Borrow<C2>,
        C2: Hash + Eq,
    {
        let table = self.context_info_at(candidates)?;
        table.frequencies()
    }

    /// Get the stats for a specific value in one of several contexts.
    ///
    /// `candidates` is a list of contexts which may be known to the
    /// predictor. This method looks for the first context of `candidates`
    /// known to the predictor, and gets the stats for the value in
    /// that context.
    ///
    /// This is typically used to provide fallback dictionaries.
    pub fn stats_by_node_value<C2: ?Sized>(
        &self,
        candidates: &[&C2],
        value: &NodeValue,
    ) -> Option<&SymbolInfo>
    where
        Context: std::borrow::Borrow<C2>,
        C2: Hash + Eq,
    {
        let context_info = self.context_info_at(candidates)?;
        context_info.stats_by_node_value().get(value)
    }
    pub fn stats_by_node_value_mut<C2: ?Sized>(
        &mut self,
        candidates: &[&C2],
        value: &NodeValue,
    ) -> Option<&mut SymbolInfo>
    where
        Context: std::borrow::Borrow<C2>,
        C2: Hash + Eq,
    {
        let context_info = self.context_info_at_mut(candidates)?;
        context_info.stats_by_node_value_mut().get_mut(value)
    }

    fn context_info_at<C2: ?Sized>(
        &self,
        candidates: &[&C2],
    ) -> Option<&ContextInformation<NodeValue, SymbolInfo>>
    where
        Context: std::borrow::Borrow<C2>,
        C2: Hash + Eq,
    {
        // This is an ugly workaround, as we cannot call `get_mut`
        // from the loop.
        let mut found = None;
        for context in candidates {
            if self.by_context.get(context).is_some() {
                found = Some(context);
                break;
            }
        }
        match found {
            None => return None,
            Some(context) => return self.by_context.get(context),
        }
    }

    fn context_info_at_mut<C2: ?Sized>(
        &mut self,
        candidates: &[&C2],
    ) -> Option<&mut ContextInformation<NodeValue, SymbolInfo>>
    where
        Context: std::borrow::Borrow<C2>,
        C2: Hash + Eq,
    {
        // This is an ugly workaround, as we cannot call `get_mut`
        // from the loop.
        let mut found = None;
        for context in candidates {
            if self.by_context.get(context).is_some() {
                found = Some(context);
                break;
            }
        }
        match found {
            None => return None,
            Some(context) => return self.by_context.get_mut(context),
        }
    }
}

impl<Context, NodeValue> InstancesToProbabilities for ContextPredict<Context, NodeValue, Instances>
where
    Context: Eq + Hash + Clone + std::fmt::Debug,
    NodeValue: Eq + Hash + Clone + Ord,
{
    type AsProbabilities = ContextPredict<Context, NodeValue, SymbolInfo>;
    fn instances_to_probabilities(
        &self,
        description: &str,
    ) -> ContextPredict<Context, NodeValue, SymbolInfo> {
        debug!(target: "entropy", "Converting ContextPredict {} to probabilities", description);
        let by_context = self
            .by_context
            .iter()
            .map(|(context, info)| {
                (
                    context.clone(),
                    info.instances_to_probabilities("ContextInformation"),
                )
            })
            .collect();
        ContextPredict { by_context }
    }
}

/// A specialized predictor used to predict possible values at a possible path in the AST.
///
/// This mechanism is meant to be used as follows:
///
/// 1. Create a new `PathPredict<NodeValue, Instances>` and use `PathPredict::add` to count the number
///     of instances of each possible `NodeValue` in each possible path. Alternatively, load this data
///     from an existing dictionary.
/// 2. Convert the `PathPredict<NodeValue, Instances>` into a `PathPredict<NodeValue, SymbolInfo>`
///     by calling `PathPredict::<_, Instances>::instances_to_probabilities`.
/// 3. Use method `PathPredict::<_, SymbolInfo>::stats_by_node_value` and `stats_by_node_value_mut` to get the statistics
///     information in a specific path for a specific node value (used for compression). This information contains
///     an index designed to be written to a compressed stream.
/// 4. Use method `PathPredict::<_, SymbolInfo>::frequencies_at` to get the statistics information in a
///     specific path for all node values that have shown up in this path (used for decompression). This
///     information is used to extract an index from a compressed stream.
/// 5. Use method `PathPredict::<_, SymbolInfo>::index_at` to get a specific node value in a specific
///     path from the index extracted from the compressed stream.
///
/// As most methods of this struct can only be used if `Statistics = Instances` xor `Statistics = SymbolInfo`,
/// the implementation of these methods is only available respectively if `Statistics = Instances` or if
/// `Statistics = SymbolInfo`.
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct PathPredict<NodeValue, Statistics>
where
    NodeValue: Eq + Hash + Clone,
{
    /// The amount of context to use.
    ///
    /// With a depth of 0, paths are ignored. With a depth of 1, we only take into account
    /// the node/field. With a depth of 2, we also take into account the node/field of the
    /// grand parent, etc.
    depth: usize,

    /// Actual information stored.
    context_predict: ContextPredict<IOPath, NodeValue, Statistics>,
}

impl<NodeValue> InstancesToProbabilities for PathPredict<NodeValue, Instances>
where
    NodeValue: Eq + Hash + Clone + Ord,
{
    type AsProbabilities = PathPredict<NodeValue, SymbolInfo>;
    fn instances_to_probabilities(&self, description: &str) -> PathPredict<NodeValue, SymbolInfo> {
        PathPredict {
            depth: self.depth,
            context_predict: self.context_predict.instances_to_probabilities(description),
        }
    }
}

impl<NodeValue, Statistics> PathPredict<NodeValue, Statistics>
where
    NodeValue: Eq + Hash + Clone,
{
    pub fn new(depth: usize) -> Self {
        PathPredict {
            depth,
            context_predict: ContextPredict::new(),
        }
    }

    /// The number of states in this predictor.
    pub fn len(&self) -> usize {
        self.context_predict.len()
    }

    /// The depth of this predictor.
    ///
    /// A depth of 1 means that the predictor uses only information on the `(interface, field)`
    /// to predict values. A depth of 2 means that the predictor also uses the `(interface, field)`
    /// of the parent interface, etc.
    pub fn depth(&self) -> usize {
        self.depth
    }

    /// All the paths known to this predictor.
    ///
    /// Used mainly for debugging.
    pub fn paths(&self) -> impl Iterator<Item = &IOPath> {
        self.context_predict.contexts()
    }

    /// Convert this predictor into an interator.
    pub fn into_iter(
        self,
    ) -> impl Iterator<Item = (IOPath, ContextInformation<NodeValue, Statistics>)> {
        self.context_predict.into_iter()
    }

    pub fn iter(
        &self,
    ) -> impl Iterator<Item = (&IOPath, &ContextInformation<NodeValue, Statistics>)> {
        self.context_predict.iter()
    }

    /// Return a tail of a path with the depth adapted to this predictor.
    fn tail<'a>(&self, path: &'a [IOPathItem]) -> &'a [IOPathItem] {
        Self::tail_of(path, self.depth)
    }

    /// Return a tail of a path with the specified depth.
    ///
    /// If the path is already shorter than the specified depth, return the path unchanged.
    fn tail_of<'a>(path: &'a [IOPathItem], depth: usize) -> &'a [IOPathItem] {
        let path = if path.len() <= depth {
            path
        } else {
            &path[path.len() - depth..]
        };
        path
    }

    pub fn iter_at(&self, path: &[IOPathItem]) -> impl Iterator<Item = (&NodeValue, &Statistics)> {
        let tail = self.tail(path);
        self.context_predict.iter_at(tail)
    }
}
impl<NodeValue> PathPredict<NodeValue, Instances>
where
    NodeValue: Eq + Hash + Clone,
{
    /// Register a value as being used at this path.
    pub fn add(&mut self, path: &[IOPathItem], value: NodeValue) {
        let tail = self.tail(path);
        let mut as_path = IOPath::new();
        as_path.extend_from_slice(tail);
        self.context_predict.add(as_path, value);
    }

    pub fn add_fallback(&mut self, other: &Self) {
        debug_assert!(
            !other.paths().any(|path| path.len() > 1),
            "The fallback dictionary should only contain paths of length 0 or 1."
        );

        debug!(target: "dictionary", "Adding fallback of length {}", other.len());
        // This dictionary already contains a number of paths, presumably
        // built from sampling. We need to make sure that each of these paths
        // maps to a number of instances > 0 to each value that may possibly
        // appear at this path.
        //
        // Therefore, we extend each known path with the values available in `other`.
        // Note that `other` only contains paths of length 0 or 1. We'll deal
        // with paths of length 0 in a further step. To deal with paths of length
        // 1, we must restrict ourselves to the tail of know paths.
        for (path, stats_by_node_value) in self.context_predict.iter_mut() {
            if path.len() == 0 {
                continue;
            }

            let tail = path.tail(1);
            for (value, statistics) in other.iter_at(tail) {
                debug_assert_eq!(Into::<usize>::into(*statistics), 1);
                stats_by_node_value.add_if_absent(value.clone());
            }
        }

        // If the sampling was insufficient, the dictionary may not know *all* possible paths.
        // We now insert wall the paths of `other` as (shorter) paths. The lookup methods know
        // that if they fail to find a path of full length, they should look for its length 1 tail.
        // This copy also handles any length 0 path.
        for (path, information) in other.iter() {
            for (value, statistics) in information.iter() {
                debug_assert_eq!(Into::<usize>::into(*statistics), 1);
                self.add_if_absent(path.borrow(), value.clone());
            }
        }
    }

    pub fn add_if_absent(&mut self, path: &[IOPathItem], value: NodeValue) {
        let tail = self.tail(path);
        let mut as_path = IOPath::new();
        as_path.extend_from_slice(tail);
        self.context_predict.add_if_absent(as_path, value);
    }
}
impl<NodeValue> PathPredict<NodeValue, SymbolInfo>
where
    NodeValue: Eq + Hash + Clone,
{
    /// Get a value by path and index.
    ///
    /// This method is only implemented when `Statistics=SymbolInfo` as the index is initialized
    /// by `instances_to_probabilities`.
    pub fn value_by_symbol_index(
        &self,
        path: &[IOPathItem],
        index: SymbolIndex,
    ) -> Option<&NodeValue> {
        if path.len() >= 2 {
            let candidates = [
                // Case 1: If the path has been encountered during sampling.
                self.tail(path),
                // Case 2: If the path has not been encountered during sampling, fallback to its length 1 suffix.
                Self::tail_of(path, 1),
            ];
            self.context_predict
                .value_by_symbol_index(&candidates, index)
        } else {
            // The path has length 0 or 1, there is only one way to represent it in this PathPredict.
            let candidates = [path];
            self.context_predict
                .value_by_symbol_index(&candidates, index)
        }
    }

    /// Get the stats for a specific value at a specific path.
    pub fn stats_by_node_value(&self, path: &[IOPathItem], value: &NodeValue) -> Option<&SymbolInfo>
    where
        NodeValue: std::fmt::Debug,
    {
        if path.len() >= 2 {
            let candidates = [
                // Case 1: If the path has been encountered during sampling.
                self.tail(path),
                // Case 2: If the path has not been encountered during sampling, fallback to its length 1 suffix.
                Self::tail_of(path, 1),
            ];
            self.context_predict.stats_by_node_value(&candidates, value)
        } else {
            // The path has length 0 or 1, there is only one way to represent it in this PathPredict.
            let candidates = [path];
            self.context_predict.stats_by_node_value(&candidates, value)
        }
    }
    pub fn stats_by_node_value_mut(
        &mut self,
        path: &[IOPathItem],
        value: &NodeValue,
    ) -> Option<&mut SymbolInfo>
    where
        NodeValue: std::fmt::Debug,
    {
        if path.len() >= 2 {
            let candidates = [
                // Case 1: If the path has been encountered during sampling.
                self.tail(path),
                // Case 2: If the path has not been encountered during sampling, fallback to its length 1 suffix.
                Self::tail_of(path, 1),
            ];
            self.context_predict
                .stats_by_node_value_mut(&candidates, value)
        } else {
            // The path has length 0 or 1, there is only one way to represent it in this PathPredict.
            let candidates = [path];
            self.context_predict
                .stats_by_node_value_mut(&candidates, value)
        }
    }

    /// Get frequency information for a given path.
    pub fn frequencies_at(
        &self,
        path: &[IOPathItem],
    ) -> Option<&Rc<RefCell<range_encoding::CumulativeDistributionFrequency>>> {
        if path.len() >= 2 {
            let candidates = [
                // Case 1: If the path has been encountered during sampling.
                self.tail(path),
                // Case 2: If the path has not been encountered during sampling, fallback to its length 1 suffix.
                Self::tail_of(path, 1),
            ];
            self.context_predict.frequencies_at(&candidates)
        } else {
            // The path has length 0 or 1, there is only one way to represent it in this PathPredict.
            let candidates = [path];
            self.context_predict.frequencies_at(&candidates)
        }
    }
}

/// An index for a value in `WindowPredict`.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize, PartialOrd, Ord)]
enum WindowPrediction {
    /// A recently encountered value.
    ///
    /// `0` is the index of the latest value, `width - 1` is the index of the oldest
    /// value still in the window.
    ///
    /// Invariant: `0 <= BackReference < width`.
    BackReference(BackReference),

    /// An index into a global dictionary.
    DictionaryIndex(DictionaryIndex),
}

/// A prediction mechanism based on a sliding window.
///
/// Whenever encoding/decoding a value, if this value is one of the `width` latest
/// seen values, we use a backreference, otherwise, we use an index into a global
/// list of values. This strategy should work well when backreferences have a high
/// probability of happening.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct WindowPredict<NodeValue, Statistics>
where
    NodeValue: Clone + Eq + Hash,
{
    /// The window width.
    width: usize,

    /// The `width` values seen most recently, indexed by `BackReference`. Note that
    /// this vector starts empty and may contain fewer than `width` values. A value
    /// may never appear twice in this vector.
    latest_values: Vec<NodeValue>,

    /// A dictionary of all values encountered so far.
    /// Whenever we encounter a value that had previously not been encountered,
    /// we append it at the end. Values never move.
    value_by_dictionary_index: Vec<NodeValue>,

    /// A mapping from NodeValue to the `DictionaryIndex` used to represent them
    /// in `value_by_dictionary_index`. Mapping for a given value never changes.
    dictionary_index_by_value: HashMap<NodeValue, DictionaryIndex>,

    /// Actual statistics on values.
    info: ContextInformation<WindowPrediction, Statistics>,
}
impl<NodeValue, Statistics> WindowPredict<NodeValue, Statistics>
where
    NodeValue: Clone + Eq + Hash,
{
    /// Create a window predictor for a given window width.
    pub fn new(width: usize) -> Self {
        WindowPredict {
            width,
            value_by_dictionary_index: Vec::with_capacity(1024), // FIXME: Magic number
            dictionary_index_by_value: HashMap::with_capacity(1024),
            latest_values: Vec::with_capacity(width),
            info: ContextInformation::new(),
        }
    }

    /// Update current window by moving the value at index `index` to the latest-seen
    /// position (index 0).
    ///
    /// Fails if the index is not in [0, self.latest_values.len()[
    fn window_move_to_front(&mut self, index: BackReference) -> Result<(), ()> {
        let as_usize = Into::<usize>::into(index);
        if as_usize == 0 {
            // `rotate_right` doesn't work on an empty slice, so we need to handle
            // this case manually.
            return Ok(());
        }
        if as_usize >= self.latest_values.len() {
            return Err(());
        }
        let ref mut slice = self.latest_values.as_mut_slice()[0..as_usize];
        slice.rotate_right(1);
        Ok(())
    }

    /// Update current window by putting a value to the latest-seen position (index 0).
    ///
    /// If the value is already in the window, it is moved to the latest-seen position,
    /// otherwise, it is added directly in latest-seen position. If the resulting window
    /// is too large, it is truncated.
    fn window_insert_value(&mut self, value: &NodeValue) -> Option<BackReference> {
        // It's possible that the value was present in the window.
        if let Some(index) = self.latest_values.iter().position(|v| v == value) {
            let index = BackReference(index);
            self.window_move_to_front(index).unwrap(); // We have just checked that `index < self.latest_values.len()`.
            return Some(index);
        }
        if self.latest_values.len() < self.width {
            // If the window isn't full yet, simply add the value at start.
            self.latest_values.insert(0, value.clone());
        } else {
            // Otherwise, push front and remove last.
            let slice = self.latest_values.as_mut_slice();
            slice[slice.len() - 1] = value.clone();
            slice.rotate_right(1);
        }
        None
    }
}
impl<NodeValue> WindowPredict<NodeValue, Instances>
where
    NodeValue: Clone + Eq + std::hash::Hash + std::fmt::Debug,
{
    pub fn add(&mut self, value: NodeValue) {
        // --- At this stage, we don't know whether the value is known.

        debug!(target: "predict", "WindowPredict: Inserting value {:?}", value);
        let number_of_values = self.value_by_dictionary_index.len();
        let dictionary_index = *self
            .dictionary_index_by_value
            .entry(value.clone())
            .or_insert(DictionaryIndex(number_of_values));

        if dictionary_index == DictionaryIndex(number_of_values) {
            // We've just inserted `value`.
            self.value_by_dictionary_index.push(value.clone());
        } else {
            // Value was already known.
            let index: usize = dictionary_index.into();
            debug_assert_eq!(value, self.value_by_dictionary_index[index]);
        };

        // --- We are now sure that the value is known.

        // Update window.
        let symbol = match self.window_insert_value(&value) {
            Some(backref) => {
                // If the value was already in the window, we'll
                // favor this window, as this should give us a tighter
                // set of common symbols.
                WindowPrediction::BackReference(backref)
            }
            None => {
                // Otherwise, fallback to the global dictionary.
                WindowPrediction::DictionaryIndex(dictionary_index)
            }
        };
        self.info.add(symbol);
    }
}

impl<NodeValue> WindowPredict<NodeValue, SymbolInfo>
where
    NodeValue: Clone + Eq + std::hash::Hash + std::fmt::Debug,
{
    /// Return all the frequencies in the window.
    pub fn frequencies(
        &self,
    ) -> Option<&Rc<RefCell<range_encoding::CumulativeDistributionFrequency>>> {
        self.info
            .stats_by_node_value()
            .values()
            .next()
            .map(|any| &any.distribution)
    }

    /// Access value from its index.
    // FIXME: We should find a way to enforce a specific mapping between `index` and `WindowPredict`,
    // to make it easy to decode.
    pub fn value_by_symbol_index(&mut self, index: SymbolIndex) -> Option<NodeValue> {
        match self.info.value_by_symbol_index(index) {
            None => None,
            Some(&WindowPrediction::DictionaryIndex(dictionary_index)) => {
                // Global entry.
                let result = self
                    .value_by_dictionary_index
                    .get(dictionary_index.0)?
                    .clone();
                self.window_insert_value(&result);
                Some(result)
            }
            Some(&WindowPrediction::BackReference(index)) => {
                let as_usize: usize = index.into();
                let result = self.latest_values.get(as_usize)?.clone();
                if let Err(_) = self.window_move_to_front(index) {
                    return None;
                }
                Some(result)
            }
        }
    }

    /// Access information for a value.
    pub fn stats_by_node_value_mut(&mut self, value: &NodeValue) -> Option<&mut SymbolInfo> {
        // At this stage, the value may appear in both the dictionary
        // and the window. We'll favor the window if possible.
        debug!(target: "predict", "WindowPredict: Fetching {:?}", value);
        let prediction = match self.window_insert_value(value) {
            Some(backref) => WindowPrediction::BackReference(backref),
            None => {
                debug!(target: "predict", "WindowPredict: Value {:?} is not in the window, let's look for it in the dictionary", value);
                let index = self.dictionary_index_by_value.get(value)?.clone();
                WindowPrediction::DictionaryIndex(index)
            }
        };

        debug!(target: "predict", "WindowPredict: {:?} has just been inserted and will be encoded as {:?}", value, prediction);
        self.info.stats_by_node_value_mut().get_mut(&prediction)
    }
}

impl<NodeValue> InstancesToProbabilities for WindowPredict<NodeValue, Instances>
where
    NodeValue: Clone + Eq + Hash + Ord,
{
    type AsProbabilities = WindowPredict<NodeValue, SymbolInfo>;
    fn instances_to_probabilities(
        &self,
        _description: &str,
    ) -> WindowPredict<NodeValue, SymbolInfo> {
        WindowPredict {
            width: self.width,
            value_by_dictionary_index: self.value_by_dictionary_index.clone(),
            dictionary_index_by_value: self.dictionary_index_by_value.clone(),
            latest_values: Vec::with_capacity(self.width),
            info: self.info.instances_to_probabilities("WindowPredict::info"),
        }
    }
}