Skip to content

API documentation

making_histograms

generate_annotated_hists(*xs: Sequence[float], max_bins: int = 50, num_bins: Optional[int] = None, min_value: Optional[float] = None, max_value: Optional[float] = None, annotations: Annotations = Annotations(0), **named_xs: Sequence[float]) -> Generator[List[str], None, None]

Yields a histogram with annotations for each set of samples

Depending on the style, it yields a header first

It will ensure that all histograms are aligned (have the same number of bins and ranges).

:param xs: The sets of samples to make histograms from. :param max_bins: The maximum number of bins to use. Determines the maximum length of the histogram string. :param num_bins: The number of bins to use. If None, it will be computed from the data. :param min_value: Where the histogram should start. If None, it will be computed from the data. :param max_value: Where the histogram should end. If None, it will be computed from the data. :param annotations: The annotations to add to each histogram such as mean/std/min/max/n. :param named_xs: The sets of values to make histograms from. The keys are used as names. :raises ValueError: If num_bins > max_bins or min_value > max_value or both xs and named_xs were specified.

Source code in src/data_samples_printer/making_histograms.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def generate_annotated_hists(
    *xs: Sequence[float],
    max_bins: int = 50,
    num_bins: Optional[int] = None,
    min_value: Optional[float] = None,
    max_value: Optional[float] = None,
    annotations: Annotations = Annotations(0),
    **named_xs: Sequence[float],
) -> Generator[List[str], None, None]:
    """Yields a histogram with annotations for each set of samples

    Depending on the style, it yields a header first

    It will ensure that all histograms are aligned (have the same number of bins and
    ranges).

    :param xs: The sets of samples to make histograms from.
    :param max_bins: The maximum number of bins to use. Determines the maximum length
    of the histogram string.
    :param num_bins: The number of bins to use. If None, it will be computed from the
    data.
    :param min_value: Where the histogram should start. If None, it will be computed
    from the data.
    :param max_value: Where the histogram should end. If None, it will be computed from
    the data.
    :param annotations: The annotations to add to each histogram such as
     mean/std/min/max/n.
    :param named_xs: The sets of values to make histograms from. The keys are used as
    names.
    :raises ValueError: If num_bins > max_bins or min_value > max_value
    or both xs and named_xs were specified.
    """
    if len(xs) > 0 and len(named_xs) > 0:
        raise ValueError(
            f"Can't have both unnamed and named arguments. "
            f"You passed {xs} and {named_xs}"
        )  # pragma: no cover

    names: Sequence[str]
    if len(named_xs) > 0:
        xs = cast(Tuple[Sequence[float]], named_xs.values())
        names = cast(Sequence[str], named_xs.keys())
        show_names = True
    else:
        names = [""] * len(xs)
        show_names = False

    for x, name, hist_str in zip(
        xs,
        names,
        generate_hists(
            *xs,
            max_bins=max_bins,
            num_bins=num_bins,
            max_value=max_value,
            min_value=min_value,
        ),
    ):
        hist_line = [hist_str]

        if Annotations.ADD_MEAN in annotations:
            hist_line.append(f"{np.mean(x):.2f}")

        if Annotations.ADD_STD in annotations:
            hist_line.append(f{np.std(x):.2f}")

        if Annotations.ADD_NUM_VALUES in annotations:
            hist_line.append(f"{len(x)}")

        if show_names:
            hist_line.append(name)

        yield hist_line

generate_hists(*xs: Sequence[float], max_bins: int = 50, num_bins: Optional[int] = None, min_value: Optional[float] = None, max_value: Optional[float] = None) -> Generator[str, None, None]

Yields histogram strings for each set of samples.

It will ensure that all histograms are aligned (have the same number of bins and ranges).

:param xs: The sets of samples to make histograms from. :param max_bins: The maximum number of bins to use. Determines the maximum length of the yielded strings. :param num_bins: The number of bins to use. If None, it will be computed from the data. :param min_value: Where the histogram should start. If None, it will be computed from the data. :param max_value: Where the histogram should end. If None, it will be computed from the data. :raises ValueError: If num_bins > max_bins or min_value > max_value.

Source code in src/data_samples_printer/making_histograms.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def generate_hists(
    *xs: Sequence[float],
    max_bins: int = 50,
    num_bins: Optional[int] = None,
    min_value: Optional[float] = None,
    max_value: Optional[float] = None,
) -> Generator[str, None, None]:
    """Yields histogram strings for each set of samples.

    It will ensure that all histograms are aligned (have the same number of bins and
    ranges).

    :param xs: The sets of samples to make histograms from.
    :param max_bins: The maximum number of bins to use. Determines the maximum length
    of the yielded strings.
    :param num_bins: The number of bins to use. If None, it will be computed from the
    data.
    :param min_value: Where the histogram should start. If None, it will be computed
    from the data.
    :param max_value: Where the histogram should end. If None, it will be computed from
    the data.
    :raises ValueError: If num_bins > max_bins or min_value > max_value.
    """
    if len(xs) == 0:
        return

    _check_for_sample_sizes_less_than_two(xs)

    num_bins = _deduce_num_bins(xs, max_bins=max_bins, num_bins=num_bins)
    min_value, max_value = _deduce_range(xs, min_value=min_value, max_value=max_value)

    for x in xs:
        hist, bin_edges = np.histogram(x, bins=num_bins, range=(min_value, max_value))
        # scales from [0, max(hist)] to [0, num_drawing_chars-1]
        max_count = np.max(hist)
        if max_count == 0:
            scaling_factor = 0
        else:
            scaling_factor = (_NUM_DRAWING_CHARS - 1) / np.max(hist)
        char_indexes = np.round(hist * scaling_factor).astype(int)
        yield "".join(_DRAWING_CHARS[char_indexes])

make_header(annotations: Annotations, add_names_column: bool) -> List[str]

Makes a header with the given annotations to print above annotated histograms.

Source code in src/data_samples_printer/making_histograms.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def make_header(annotations: Annotations, add_names_column: bool) -> List[str]:
    """Makes a header with the given annotations to print above annotated histograms."""
    header = ["dist"]

    if Annotations.ADD_MEAN in annotations:
        header.append("mean")

    if Annotations.ADD_STD in annotations:
        header.append("std")

    if Annotations.ADD_NUM_VALUES in annotations:
        header.append("n")

    if Annotations.ADD_MIN in annotations:
        header.append("min")

    if Annotations.ADD_MAX in annotations:
        header.append("max")

    if add_names_column:
        header.append("name")

    return header

Makes a footer with the min and max values of the given samples.

To be printed below and unannotated histogram.

:param xs: The sets of samples to make histograms from. :param width: The width of the histogram in chars. :param min_value: Where the histogram should start. If None, it will be computed from the data. :param max_value: Where the histogram should end. If None, it will be computed from the data.

Source code in src/data_samples_printer/making_histograms.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def make_min_max_footer(
    *xs: Sequence[float],
    width: int,
    min_value: Optional[float] = None,
    max_value: Optional[float] = None,
    **named_xs: Sequence[float],
) -> str:
    """Makes a footer with the min and max values of the given samples.

    To be printed below and unannotated histogram.

    :param xs: The sets of samples to make histograms from.
    :param width: The width of the histogram in chars.
    :param min_value: Where the histogram should start. If None, it will be computed
    from the data.
    :param max_value: Where the histogram should end. If None, it will be computed from
    the data.
    """
    if len(named_xs) > 0:
        xs = cast(tuple, named_xs.values())

    if len(xs) == 0:
        return " " * width

    min_value, max_value = _deduce_range(xs, min_value=min_value, max_value=max_value)

    min_str = f"{min_value:.2f}"
    max_str = f"{max_value:.2f}"
    return min_str + " " * max(1, (width - len(min_str) - len(max_str) - 4)) + max_str

printing_histograms