Source code for openfisca_core.tracers.performance_log

from __future__ import annotations

import typing

import csv
import importlib.resources
import itertools
import json
import os

from openfisca_core import tracers

if typing.TYPE_CHECKING:
    Trace = dict[str, dict]
    Calculation = tuple[str, dict]
    SortedTrace = list[Calculation]


[docs] class PerformanceLog: def __init__(self, full_tracer: tracers.FullTracer) -> None: self._full_tracer = full_tracer def generate_graph(self, dir_path: str) -> None: with open(os.path.join(dir_path, "performance_graph.html"), "w") as f: template = importlib.resources.read_text( "openfisca_core.scripts.assets", "index.html", ) perf_graph_html = template.replace( "{{data}}", json.dumps(self._json()), ) f.write(perf_graph_html) def generate_performance_tables(self, dir_path: str) -> None: flat_trace = self._full_tracer.get_flat_trace() csv_rows = [ { "name": key, "calculation_time": trace["calculation_time"], "formula_time": trace["formula_time"], } for key, trace in flat_trace.items() ] self._write_csv( os.path.join(dir_path, "performance_table.csv"), csv_rows, ) aggregated_csv_rows = [ {"name": key, **aggregated_time} for key, aggregated_time in self.aggregate_calculation_times( flat_trace, ).items() ] self._write_csv( os.path.join(dir_path, "aggregated_performance_table.csv"), aggregated_csv_rows, ) def aggregate_calculation_times( self, flat_trace: Trace, ) -> dict[str, dict]: def _aggregate_calculations(calculations: list) -> dict: calculation_count = len(calculations) calculation_time = sum( calculation[1]["calculation_time"] for calculation in calculations ) formula_time = sum( calculation[1]["formula_time"] for calculation in calculations ) return { "calculation_count": calculation_count, "calculation_time": tracers.TraceNode.round(calculation_time), "formula_time": tracers.TraceNode.round(formula_time), "avg_calculation_time": tracers.TraceNode.round( calculation_time / calculation_count, ), "avg_formula_time": tracers.TraceNode.round( formula_time / calculation_count, ), } def _groupby(calculation: Calculation) -> str: return calculation[0].split("<")[0] all_calculations: SortedTrace = sorted(flat_trace.items()) return { variable_name: _aggregate_calculations(list(calculations)) for variable_name, calculations in itertools.groupby( all_calculations, _groupby, ) } def _json(self) -> dict: children = [self._json_tree(tree) for tree in self._full_tracer.trees] calculations_total_time = sum(child["value"] for child in children) return { "name": "All calculations", "value": calculations_total_time, "children": children, } def _json_tree(self, tree: tracers.TraceNode) -> dict: calculation_total_time = tree.calculation_time() children = [self._json_tree(child) for child in tree.children] return { "name": f"{tree.name}<{tree.period}>", "value": calculation_total_time, "children": children, } def _write_csv(self, path: str, rows: list[dict]) -> None: fieldnames = list(rows[0].keys()) with open(path, "w") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for row in rows: writer.writerow(row)