/* ###
 * IP: GHIDRA
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ghidra.pcode.emu.jit;

import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.EnumSet;

import org.objectweb.asm.ClassWriter;

import ghidra.pcode.emu.jit.analysis.*;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassageClass;
import ghidra.pcode.exec.PcodeExecutorState;

/**
 * The Just-in-Time (JIT) translation engine that powers the {@link JitPcodeEmulator}.
 * 
 * <p>
 * This is the translation engine from "any" machine language into JVM bytecode. The same caveats
 * that apply to interpretation-based p-code emulation apply to JIT-accelerated emulation: Ghidra
 * must have a Sleigh specification for the emulation target language, there must be userop
 * libraries (built-in or user-provided) defining any userops encountered during the course of
 * execution, all dependent code must be loaded or stubbed out, etc.
 *
 * <p>
 * A passage is decoded at a desired entry point using the {@link JitPassageDecoder}. This compiler
 * then translates the passage into bytecode. It will produce a classfile which is then loaded and
 * returned to the emulator (or other client). The provided class will have three principal methods,
 * not counting getters: 1) The class initializer, which initializes static fields; 2) The
 * constructor, which takes a thread and initializes instance fields, and 3) The
 * {@link JitCompiledPassage#run(int) run} method, which comprises the actual translation. A static
 * field {@code ENTRIES} describes each entry point generated by the compiler. To execute the
 * passage starting at a given entry point, the emulation thread must retrieve the index of the
 * appropriate entry (i.e., address and contextreg value), instantiate the class, and then invoke
 * the run method, passing it the entry index. The translated passage will read variables from the
 * thread's {@link JitBytesPcodeExecutorState state} as needed, perform the equivalent operations as
 * expressed in the source p-code, and then write the resulting variables back into the state.
 * Memory variables are treated similarly, but without scope-based optimizations. In this manner,
 * execution of the translated passage produces exactly the same effect on the emulation state as
 * interpretation of the same p-code passage. The run method returns the next entry point to execute
 * or {@code null} when the emulator must look up the next entry point.
 *
 * <p>
 * Translation of a passage takes place in distinct phases. See each respective class for details of
 * its design and implementation:
 * 
 * <ol>
 * <li>Control Flow Analysis: {@link JitControlFlowModel}</li>
 * <li>Data Flow Analysis: {@link JitDataFlowModel}</li>
 * <li>Variable Scope Analysis: {@link JitVarScopeModel}</li>
 * <li>Type Assignment: {@link JitTypeModel}</li>
 * <li>Variable Allocation: {@link JitAllocationModel}</li>
 * <li>Operation Elimination: {@link JitOpUseModel}</li>
 * <li>Code Generation: {@link JitCodeGenerator}</li>
 * </ol>
 * 
 * <h2>Control Flow Analysis</h2>
 * <p>
 * Some rudimentary control flow analysis is performed during decode, but the output of decode is a
 * passage, i.e., collection of <em>strides</em>, not basic blocks. The control flow analysis breaks
 * each stride down into basic blocks at the p-code level. Note that a single instruction's pcode
 * (as well as any user instrumentation on that instruction's address) may have complex control
 * flow. Additionally, branches that leave an instruction preclude execution of its remaining
 * p-code. Thus, p-code basic blocks do not coincide precisely with instruction-level basic blocks.
 * See {@link JitControlFlowModel}.
 * 
 * <h2>Data Flow Analysis</h2>
 * <p>
 * Most every following step consumes the control flow analysis. Data flow analysis interprets each
 * basic block independently using an abstraction that produces a use-def graph. A varnode that is
 * read before it is written produces a "missing" variable. Those missing variables are converted to
 * <em>phi</em> nodes and later resolved during inter-block analysis. The graph is also able to
 * consider aliasing, partial accesses, overlapping accesses, etc., by synthesizing operations to
 * model those effects. See {@link JitDataFlowModel}.
 * 
 * <h2>Variable Scope Analysis</h2>
 * <p>
 * Because accessing {@link PcodeExecutorState} is expensive (relative to accessing a JVM local
 * variable), the translation seeks to minimize such accesses. This is generally not recommended for
 * memory accesses, as there is no telling in multi-threaded applications whether a given memory
 * variable is shared/volatile or not. However, for registers and uniques, we can allocate the
 * variables as JVM locals. Then we only "birth" them (read them in) when they come into scope and
 * "retire" them (write them out) when they leave scope. This analyzer determines which variables
 * are in scope (alive) in which basic blocks. See {@link JitVarScopeModel}.
 * 
 * <h2>Type Assignment</h2>
 * <p>
 * For those variables we allocate as JVM locals, we have to choose a type, because the JVM requires
 * it. We have essentially 4 to choose from. (Though we could also choose a <em>reference</em> type,
 * depending on the strategy we eventually choose for multi-precision arithmetic.) Those four are
 * the JVM primitives: int, float, long, and double. For those more familiar with Java but not the
 * JVM, the smaller integral primitives are all represented by JVM ints. The JVM does not permit
 * type confusion, e.g., the application of float addition {@code FADD} to int variables. However,
 * the emulation target may permit type confusion. (Those familiar with the constant 0x5f759df may
 * appreciate intentional type confusion.) When this happens, we must explicitly convert by calling,
 * e.g., {@link Float#floatToRawIntBits(float)}, which is essentially just a bit cast. Nevertheless,
 * we seek to reduce the number of such calls we encode into the translation. See
 * {@link JitTypeModel}.
 * 
 * <h2>Variable Allocation</h2>
 * <p>
 * Once we've decided the type of each use-def variable node, we allocate JVM locals and assign
 * their types accordingly. To keep things simple and fast, we just allocate variables by varnode.
 * Partial/overlapping accesses are coalesced to the containing varnode and cause the type to be a
 * JVM int (to facilitate shifting and masking). Otherwise, types are assigned according to the most
 * common use of the varnode, i.e., by taking a vote among the use-def variable nodes sharing that
 * varnode. See {@link JitAllocationModel}.
 * 
 * <h2>Operation Elimination</h2>
 * <p>
 * Each instruction typically produces several p-code ops, the outputs of which may not actually be
 * used by any subsequent op. This analysis seeks to identify such p-code ops and remove them. Since
 * many ISAs employ "flags," which are set by nearly every arithmetic instruction, such ops are
 * incredibly common. Worse yet, their computation is very expensive, because the JVM does not have
 * comparable flag registers, nor does it provide opcodes for producing comparable values. We have
 * to emit the bit banging operations ourselves. Thus, performing this elimination stands to improve
 * execution speed significantly. However, eliminating these operations may lead to confusing
 * results if execution is interrupted and the state inspected by a user. The effects of the
 * eliminated operations will be missing. Even though they do not (or should not) matter, the user
 * may expect to see them. Thus, this step can be toggled by
 * {@link JitConfiguration#removeUnusedOperations()}. See {@link JitOpUseModel}.
 * 
 * <h2>Code Generation</h2>
 * <p>
 * For simplicity, we seek to generate JVM bytecode in the same order as the source p-code ops.
 * There are several details given the optimizations informed by all the preceding analysis. For
 * example, the transfer of control to the requested entry point, the placement of variable birth
 * and retirement on control flow edges (including fall-through).... We take an object-oriented
 * approach to the translation of each p-code op, the handling of each variable's allocation and
 * access, the conversion of types, etc. This phase outputs the final classfile bytes, which are
 * then loaded as a hidden class. See {@link JitCodeGenerator}.
 * 
 * @implNote There are static fields in this class for configuring diagnostics. They are meant to be
 *           modified only temporarily by developers seeking to debug issues in the translation
 *           engine.
 */
public class JitCompiler {
	/**
	 * Diagnostic toggles
	 */
	public enum Diag {
		/** Print each passage (instructions and p-code ops) before translation */
		PRINT_PASSAGE,
		/** Print the contents (p-code) of each basic block and flows/branches among them */
		PRINT_CFM,
		/** Print the ops of each basic block in SSA (sort of) form */
		PRINT_DFM,
		/** Print the list of live variables for each basic block */
		PRINT_VSM,
		/** Print each synthetic operation, e.g., catenation, subpiece, phi */
		PRINT_SYNTH,
		/** Print each eliminated op */
		PRINT_OUM,
		/** Enable ASM's trace for each generated classfile */
		TRACE_CLASS,
		/** Save the generated {@code .class} file to disk for offline examination */
		DUMP_CLASS;
	}

	/**
	 * The set of enabled diagnostic toggles.
	 * 
	 * <p>
	 * In production, this should be empty.
	 */
	public static final EnumSet<Diag> ENABLE_DIAGNOSTICS = EnumSet.noneOf(Diag.class);

	/**
	 * Exclude a given address offset from ASM's {@link ClassWriter#COMPUTE_MAXS} and
	 * {@link ClassWriter#COMPUTE_FRAMES}.
	 * 
	 * <p>
	 * Unfortunately, when automatic computation of frames and maxes fails, the ASM library offers
	 * little in terms of diagnostics. It usually crashes with an NPE or an AIOOBE. Worse, when this
	 * happens, it fails to output any of the classfile trace. To help with this, a developer may
	 * identify the address of the passage seed that causes such a failure and set this variable to
	 * its offset. This will prevent ASM from attempting this computation so that it at least prints
	 * the trace and dumps out the classfile to disk (if those {@link Diag}nostics are enabled).
	 * 
	 * <p>
	 * Once the trace/classfile is obtained, set this back to -1 and then apply debug prints in the
	 * crashing method. Since it's probably in the ASM library, you'll need to use your IDE /
	 * debugger to inject those prints. The way to do this in Eclipse is to set a "conditional
	 * breakpoint" then have the condition print the value and return false, so that execution
	 * continues. Sadly, this will still slow execution down considerably, so you'll want to set
	 * some other conditional breakpoint to catch when the troublesome passage is being translated.
	 * Probably the most helpful thing to print is the bytecode offset of each basic block ASM is
	 * processing as it computes the frames. Once it crashes, look at the last couple of bytecode
	 * offsets in the dumped classfile.
	 */
	public static final long EXCLUDE_MAXS = -1L;

	/**
	 * The JIT emulator's configuration
	 */
	private final JitConfiguration config;

	/**
	 * Construct a p-code to bytecode translator.
	 * 
	 * <p>
	 * In general, this should only be used by the JIT emulator and its test suite.
	 * 
	 * @param config the configuration
	 */
	public JitCompiler(JitConfiguration config) {
		this.config = config;
	}

	/**
	 * Translate a passage using the given lookup
	 * 
	 * @param lookup a lookup that can access everything the passage may need, e.g., userop
	 *            libraries. Likely, this should come from the emulator, which may be in a script.
	 *            If you are unsure what to use here, use {@link MethodHandles#lookup()}. If you see
	 *            errors about accessing stuff during the compilation, ensure everything the
	 *            emulator needs is accessible from the method calling
	 *            {@link MethodHandles#lookup()}.
	 * @param passage the decoded passage to compile
	 * @return the compiled class, not instantiated for any particular thread
	 */
	public JitCompiledPassageClass compilePassage(Lookup lookup, JitPassage passage) {
		if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_PASSAGE)) {
			System.err.println(passage);
		}
		JitAnalysisContext context = new JitAnalysisContext(config, passage);
		JitControlFlowModel cfm = new JitControlFlowModel(context);
		if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_CFM)) {
			cfm.dumpResult();
		}
		JitDataFlowModel dfm = new JitDataFlowModel(context, cfm);
		if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_DFM)) {
			dfm.dumpResult();
		}
		JitVarScopeModel vsm = new JitVarScopeModel(cfm, dfm);
		if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_VSM)) {
			vsm.dumpResult();
		}
		JitTypeModel tm = new JitTypeModel(dfm);
		JitAllocationModel am = new JitAllocationModel(context, dfm, vsm, tm);
		JitOpUseModel oum = new JitOpUseModel(context, cfm, dfm, vsm);
		if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_SYNTH)) {
			dfm.dumpSynth();
		}
		if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_OUM)) {
			oum.dumpResult();
		}

		JitCodeGenerator<?> gen =
			new JitCodeGenerator<>(lookup, context, cfm, dfm, vsm, tm, am, oum);
		return gen.load();
	}

	/**
	 * Get this compiler's configuration
	 * 
	 * @return the configuration
	 */
	public JitConfiguration getConfiguration() {
		return config;
	}
}
