ZX2C4

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . TEMPLATE = app CONFIG += qt HEADERS += TriangleFractalChaos.h SOURCES += main.cpp TriangleFractalChaos.cpp /* SPDX-License-Identifier: GPL-2.0 */ /* eBPF instruction mini library */ #ifndef __BPF_INSN_H #define __BPF_INSN_H struct bpf_insn; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ #define BPF_ALU64_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_ALU32_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ #define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_ALU32_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_MOV32_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) /* Short form of mov, dst_reg = imm32 */ #define BPF_MOV64_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_MOV32_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_DW | BPF_IMM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = (__u32) (IMM) }), \ ((struct bpf_insn) { \ .code = 0, /* zero is reserved opcode */ \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) #ifndef BPF_PSEUDO_MAP_FD # define BPF_PSEUDO_MAP_FD 1 #endif /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ #define BPF_LD_ABS(SIZE, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ #define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ ((struct bpf_insn) { \ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ #define BPF_JMP_REG(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ #define BPF_JMP32_REG(OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) /* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ #define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ ((struct bpf_insn) { \ .code = CODE, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = IMM }) /* Program exit */ #define BPF_EXIT_INSN() \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_EXIT, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = 0 }) #endif # Go Implementation of [WireGuard](https://www.wireguard.com/) This is an implementation of WireGuard in Go. ## Usage Most Linux kernel WireGuard users are used to adding an interface with `ip link add wg0 type wireguard`. With wireguard-go, instead simply run: ``` $ wireguard-go wg0 ``` This will create an interface and fork into the background. To remove the interface, use the usual `ip link del wg0`, or if your system does not support removing interfaces directly, you may instead remove the control socket via `rm -f /var/run/wireguard/wg0.sock`, which will result in wireguard-go shutting down. To run wireguard-go without forking to the background, pass `-f` or `--foreground`: ``` $ wireguard-go -f wg0 ``` When an interface is running, you may use [`wg(8)`](https://git.zx2c4.com/wireguard-tools/about/src/man/wg.8) to configure it, as well as the usual `ip(8)` and `ifconfig(8)` commands. To run with more logging you may set the environment variable `LOG_LEVEL=debug`. ## Platforms ### Linux This will run on Linux; however you should instead use the kernel module, which is faster and better integrated into the OS. See the [installation page](https://www.wireguard.com/install/) for instructions. ### macOS This runs on macOS using the utun driver. It does not yet support sticky sockets, and won't support fwmarks because of Darwin limitations. Since the utun driver cannot have arbitrary interface names, you must either use `utun[0-9]+` for an explicit interface name or `utun` to have the kernel select one for you. If you choose `utun` as the interface name, and the environment variable `WG_TUN_NAME_FILE` is defined, then the actual name of the interface chosen by the kernel is written to the file specified by that variable. ### Windows This runs on Windows, but you should instead use it from the more [fully featured Windows app](https://git.zx2c4.com/wireguard-windows/about/), which uses this as a module. ### FreeBSD This will run on FreeBSD. It does not yet support sticky sockets. Fwmark is mapped to `SO_USER_COOKIE`. ### OpenBSD This will run on OpenBSD. It does not yet support sticky sockets. Fwmark is mapped to `SO_RTABLE`. Since the tun driver cannot have arbitrary interface names, you must either use `tun[0-9]+` for an explicit interface name or `tun` to have the program select one for you. If you choose `tun` as the interface name, and the environment variable `WG_TUN_NAME_FILE` is defined, then the actual name of the interface chosen by the kernel is written to the file specified by that variable. ## Building This requires an installation of the latest version of [Go](https://go.dev/). ``` $ git clone https://git.zx2c4.com/wireguard-go $ cd wireguard-go $ make ``` ## License Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ### i915-watt This tool displays second-long samples of power usage of the Intel graphics card. ``` $ git clone https://git.zx2c4.com/i915-watt && cd i915-watt $ make $ sudo ./i915-watt ``` /* ui-summary.c: functions for generating repo summary page * * Copyright (C) 2006-2014 cgit Development Team <cgit@lists.zx2c4.com> * * Licensed under GNU General Public License v2 * (see COPYING for full license text) */ #include "cgit.h" #include "ui-summary.h" #include "html.h" #include "ui-blob.h" #include "ui-log.h" #include "ui-plain.h" #include "ui-refs.h" #include "ui-shared.h" static int urls; static void print_url(const char *url) { int columns = 3; if (ctx.repo->enable_log_filecount) columns++; if (ctx.repo->enable_log_linecount) columns++; if (urls++ == 0) { htmlf("<tr class='nohover'><td colspan='%d'> </td></tr>", columns); htmlf("<tr class='nohover'><th class='left' colspan='%d'>Clone</th></tr>\n", columns); } htmlf("<tr><td colspan='%d'><a rel='vcs-git' href='", columns); html_url_path(url); html("' title='"); html_attr(ctx.repo->name); html(" Git repository'>"); html_txt(url); html("</a></td></tr>\n"); } void cgit_print_summary(void) { int columns = 3; if (ctx.repo->enable_log_filecount) columns++; if (ctx.repo->enable_log_linecount) columns++; cgit_print_layout_start(); html("<table summary='repository info' class='list nowrap'>"); cgit_print_branches(ctx.cfg.summary_branches); htmlf("<tr class='nohover'><td colspan='%d'> </td></tr>", columns); cgit_print_tags(ctx.cfg.summary_tags); if (ctx.cfg.summary_log > 0) { htmlf("<tr class='nohover'><td colspan='%d'> </td></tr>", columns); cgit_print_log(ctx.qry.head, 0, ctx.cfg.summary_log, NULL, NULL, NULL, 0, 0, 0); } urls = 0; cgit_add_clone_urls(print_url); html("</table>"); cgit_print_layout_end(); } /* The caller must free the return value. */ static char* append_readme_path(const char *filename, const char *ref, const char *path) { char *file, *base_dir, *full_path, *resolved_base = NULL, *resolved_full = NULL; /* If a subpath is specified for the about page, make it relative * to the directory containing the configured readme. */ file = xstrdup(filename); base_dir = dirname(file); if (!strcmp(base_dir, ".") || !strcmp(base_dir, "..")) { if (!ref) { free(file); return NULL; } full_path = xstrdup(path); } else full_path = fmtalloc("%s/%s", base_dir, path); if (!ref) { resolved_base = realpath(base_dir, NULL); resolved_full = realpath(full_path, NULL); if (!resolved_base || !resolved_full || !starts_with(resolved_full, resolved_base)) { free(full_path); full_path = NULL; } } free(file); free(resolved_base); free(resolved_full); return full_path; } void cgit_print_repo_readme(const char *path) { char *filename, *ref, *mimetype; int free_filename = 0; mimetype = get_mimetype_for_filename(path); if (mimetype && (!strncmp(mimetype, "image/", 6) || !strncmp(mimetype, "video/", 6))) { ctx.page.mimetype = mimetype; ctx.page.charset = NULL; cgit_print_plain(); free(mimetype); return; } free(mimetype); cgit_print_layout_start(); if (ctx.repo->readme.nr == 0) goto done; filename = ctx.repo->readme.items[0].string; ref = ctx.repo->readme.items[0].util; if (path) { free_filename = 1; filename = append_readme_path(filename, ref, path); if (!filename) goto done; } /* Print the calculated readme, either from the git repo or from the * filesystem, while applying the about-filter. */ html("<div id='summary'>"); cgit_open_filter(ctx.repo->about_filter, filename); if (ref) cgit_print_file(filename, ref, 1); else html_include(filename); cgit_close_filter(ctx.repo->about_filter); html("</div>"); if (free_filename) free(filename); done: cgit_print_layout_end(); } #include <stdint.h> /* Computes the addition of four-element f1 with value in f2 * and returns the carry (if any) */ static uint64_t add_scalar(uint64_t *out, const uint64_t *f1, uint64_t f2) { uint64_t carry_r; asm volatile( /* Clear registers to propagate the carry bit */ " xor %%r8, %%r8;" " xor %%r9, %%r9;" " xor %%r10, %%r10;" " xor %%r11, %%r11;" " xor %1, %1;" /* Begin addition chain */ " addq 0(%3), %0;" " movq %0, 0(%2);" " adcxq 8(%3), %%r8;" " movq %%r8, 8(%2);" " adcxq 16(%3), %%r9;" " movq %%r9, 16(%2);" " adcxq 24(%3), %%r10;" " movq %%r10, 24(%2);" /* Return the carry bit in a register */ " adcx %%r11, %1;" : "+&r" (f2), "=&r" (carry_r) : "r" (out), "r" (f1) : "%r8", "%r9", "%r10", "%r11", "memory", "cc" ); return carry_r; } /* Computes the field addition of two field elements */ static void fadd(uint64_t *out, const uint64_t *f1, const uint64_t *f2) { asm volatile( /* Compute the raw addition of f1 + f2 */ " movq 0(%0), %%r8;" " addq 0(%2), %%r8;" " movq 8(%0), %%r9;" " adcxq 8(%2), %%r9;" " movq 16(%0), %%r10;" " adcxq 16(%2), %%r10;" " movq 24(%0), %%r11;" " adcxq 24(%2), %%r11;" /* Wrap the result back into the field */ /* Step 1: Compute carry*38 */ " mov $0, %%rax;" " mov $38, %0;" " cmovc %0, %%rax;" /* Step 2: Add carry*38 to the original sum */ " xor %%rcx, %%rcx;" " add %%rax, %%r8;" " adcx %%rcx, %%r9;" " movq %%r9, 8(%1);" " adcx %%rcx, %%r10;" " movq %%r10, 16(%1);" " adcx %%rcx, %%r11;" " movq %%r11, 24(%1);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %0, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%1);" : "+&r" (f2) : "r" (out), "r" (f1) : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" ); } /* Computes the field substraction of two field elements */ static void fsub(uint64_t *out, const uint64_t *f1, const uint64_t *f2) { asm volatile( /* Compute the raw substraction of f1-f2 */ " movq 0(%1), %%r8;" " subq 0(%2), %%r8;" " movq 8(%1), %%r9;" " sbbq 8(%2), %%r9;" " movq 16(%1), %%r10;" " sbbq 16(%2), %%r10;" " movq 24(%1), %%r11;" " sbbq 24(%2), %%r11;" /* Wrap the result back into the field */ /* Step 1: Compute carry*38 */ " mov $0, %%rax;" " mov $38, %%rcx;" " cmovc %%rcx, %%rax;" /* Step 2: Substract carry*38 from the original difference */ " sub %%rax, %%r8;" " sbb $0, %%r9;" " sbb $0, %%r10;" " sbb $0, %%r11;" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rcx, %%rax;" " sub %%rax, %%r8;" /* Store the result */ " movq %%r8, 0(%0);" " movq %%r9, 8(%0);" " movq %%r10, 16(%0);" " movq %%r11, 24(%0);" : : "r" (out), "r" (f1), "r" (f2) : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" ); } /* Computes a field multiplication: out <- f1 * f2 * Uses the 8-element buffer tmp for intermediate results */ static void fmul(uint64_t *out, const uint64_t *f1, const uint64_t *f2, uint64_t *tmp) { asm volatile( /* Compute the raw multiplication: tmp <- src1 * src2 */ /* Compute src1[0] * src2 */ " movq 0(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" /* Compute src1[1] * src2 */ " movq 8(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[2] * src2 */ " movq 16(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[3] * src2 */ " movq 24(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" /* Line up pointers */ " mov %0, %1;" " mov %2, %0;" /* Wrap the result back into the field */ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" " mulxq 32(%1), %%r8, %%r13;" " xor %3, %3;" " adoxq 0(%1), %%r8;" " mulxq 40(%1), %%r9, %%r12;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" " adcx %%r12, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" " adoxq 24(%1), %%r11;" " adcx %3, %%rax;" " adox %3, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %3, %%r9;" " movq %%r9, 8(%0);" " adcx %3, %%r10;" " movq %%r10, 16(%0);" " adcx %3, %%r11;" " movq %%r11, 24(%0);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%0);" : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) : : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" ); } /* Computes two field multiplications: * out[0] <- f1[0] * f2[0] * out[1] <- f1[1] * f2[1] * Uses the 16-element buffer tmp for intermediate results. */ static void fmul2(uint64_t *out, const uint64_t *f1, const uint64_t *f2, uint64_t *tmp) { asm volatile( /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ /* Compute src1[0] * src2 */ " movq 0(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" /* Compute src1[1] * src2 */ " movq 8(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[2] * src2 */ " movq 16(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[3] * src2 */ " movq 24(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ /* Compute src1[0] * src2 */ " movq 32(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" /* Compute src1[1] * src2 */ " movq 40(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[2] * src2 */ " movq 48(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[3] * src2 */ " movq 56(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" /* Line up pointers */ " mov %0, %1;" " mov %2, %0;" /* Wrap the results back into the field */ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" " mulxq 32(%1), %%r8, %%r13;" " xor %3, %3;" " adoxq 0(%1), %%r8;" " mulxq 40(%1), %%r9, %%r12;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" " adcx %%r12, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" " adoxq 24(%1), %%r11;" " adcx %3, %%rax;" " adox %3, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %3, %%r9;" " movq %%r9, 8(%0);" " adcx %3, %%r10;" " movq %%r10, 16(%0);" " adcx %3, %%r11;" " movq %%r11, 24(%0);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%0);" /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" " mulxq 96(%1), %%r8, %%r13;" " xor %3, %3;" " adoxq 64(%1), %%r8;" " mulxq 104(%1), %%r9, %%r12;" " adcx %%r13, %%r9;" " adoxq 72(%1), %%r9;" " mulxq 112(%1), %%r10, %%r13;" " adcx %%r12, %%r10;" " adoxq 80(%1), %%r10;" " mulxq 120(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" " adoxq 88(%1), %%r11;" " adcx %3, %%rax;" " adox %3, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %3, %%r9;" " movq %%r9, 40(%0);" " adcx %3, %%r10;" " movq %%r10, 48(%0);" " adcx %3, %%r11;" " movq %%r11, 56(%0);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 32(%0);" : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) : : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" ); } /* Computes the field multiplication of four-element f1 with value in f2 */ static void fmul_scalar(uint64_t *out, const uint64_t *f1, uint64_t f2) { register uint64_t f2_r asm("rdx") = f2; asm volatile( /* Compute the raw multiplication of f1*f2 */ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ " add %%rcx, %%r9;" " mov $0, %%rcx;" " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ " adcx %%r12, %%r10;" " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ " adcx %%r13, %%r11;" " adcx %%rcx, %%rax;" /* Wrap the result back into the field */ /* Step 1: Compute carry*38 */ " mov $38, %%rdx;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" " movq %%r9, 8(%1);" " adcx %%rcx, %%r10;" " movq %%r10, 16(%1);" " adcx %%rcx, %%r11;" " movq %%r11, 24(%1);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%1);" : "+&r" (f2_r) : "r" (out), "r" (f1) : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" ); } /* Computes p1 <- bit ? p2 : p1 in constant time */ static void cswap2(uint64_t bit, const uint64_t *p1, const uint64_t *p2) { asm volatile( /* Invert the polarity of bit to match cmov expectations */ " add $18446744073709551615, %0;" /* cswap p1[0], p2[0] */ " movq 0(%1), %%r8;" " movq 0(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 0(%1);" " movq %%r9, 0(%2);" /* cswap p1[1], p2[1] */ " movq 8(%1), %%r8;" " movq 8(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 8(%1);" " movq %%r9, 8(%2);" /* cswap p1[2], p2[2] */ " movq 16(%1), %%r8;" " movq 16(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 16(%1);" " movq %%r9, 16(%2);" /* cswap p1[3], p2[3] */ " movq 24(%1), %%r8;" " movq 24(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 24(%1);" " movq %%r9, 24(%2);" /* cswap p1[4], p2[4] */ " movq 32(%1), %%r8;" " movq 32(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 32(%1);" " movq %%r9, 32(%2);" /* cswap p1[5], p2[5] */ " movq 40(%1), %%r8;" " movq 40(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 40(%1);" " movq %%r9, 40(%2);" /* cswap p1[6], p2[6] */ " movq 48(%1), %%r8;" " movq 48(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 48(%1);" " movq %%r9, 48(%2);" /* cswap p1[7], p2[7] */ " movq 56(%1), %%r8;" " movq 56(%2), %%r9;" " mov %%r8, %%r10;" " cmovc %%r9, %%r8;" " cmovc %%r10, %%r9;" " movq %%r8, 56(%1);" " movq %%r9, 56(%2);" : "+&r" (bit) : "r" (p1), "r" (p2) : "%r8", "%r9", "%r10", "memory", "cc" ); } /* Computes the square of a field element: out <- f * f * Uses the 8-element buffer tmp for intermediate results */ static void fsqr(uint64_t *out, const uint64_t *f, uint64_t *tmp) { asm volatile( /* Compute the raw multiplication: tmp <- f * f */ /* Step 1: Compute all partial products */ " movq 0(%1), %%rdx;" /* f[0] */ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ " movq 24(%1), %%rdx;" /* f[3] */ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ /* Step 2: Compute two parallel carry chains */ " xor %%r15, %%r15;" " adox %%rax, %%r10;" " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" " adcx %%r9, %%r9;" " adox %%r15, %%r12;" " adcx %%r10, %%r10;" " adox %%r15, %%r13;" " adcx %%r11, %%r11;" " adox %%r15, %%r14;" " adcx %%r12, %%r12;" " adcx %%r13, %%r13;" " adcx %%r14, %%r14;" /* Step 3: Compute intermediate squares */ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ " movq %%rax, 0(%0);" " add %%rcx, %%r8;" " movq %%r8, 8(%0);" " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" /* Line up pointers */ " mov %0, %1;" " mov %2, %0;" /* Wrap the result back into the field */ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" " mulxq 32(%1), %%r8, %%r13;" " xor %%rcx, %%rcx;" " adoxq 0(%1), %%r8;" " mulxq 40(%1), %%r9, %%r12;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" " adcx %%r12, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" " adoxq 24(%1), %%r11;" " adcx %%rcx, %%rax;" " adox %%rcx, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" " movq %%r9, 8(%0);" " adcx %%rcx, %%r10;" " movq %%r10, 16(%0);" " adcx %%rcx, %%r11;" " movq %%r11, 24(%0);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%0);" : "+&r" (tmp), "+&r" (f), "+&r" (out) : : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" ); } /* Computes two field squarings: * out[0] <- f[0] * f[0] * out[1] <- f[1] * f[1] * Uses the 16-element buffer tmp for intermediate results */ static void fsqr2(uint64_t *out, const uint64_t *f, uint64_t *tmp) { asm volatile( /* Step 1: Compute all partial products */ " movq 0(%1), %%rdx;" /* f[0] */ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ " movq 24(%1), %%rdx;" /* f[3] */ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ /* Step 2: Compute two parallel carry chains */ " xor %%r15, %%r15;" " adox %%rax, %%r10;" " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" " adcx %%r9, %%r9;" " adox %%r15, %%r12;" " adcx %%r10, %%r10;" " adox %%r15, %%r13;" " adcx %%r11, %%r11;" " adox %%r15, %%r14;" " adcx %%r12, %%r12;" " adcx %%r13, %%r13;" " adcx %%r14, %%r14;" /* Step 3: Compute intermediate squares */ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ " movq %%rax, 0(%0);" " add %%rcx, %%r8;" " movq %%r8, 8(%0);" " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" /* Step 1: Compute all partial products */ " movq 32(%1), %%rdx;" /* f[0] */ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ " movq 56(%1), %%rdx;" /* f[3] */ " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ /* Step 2: Compute two parallel carry chains */ " xor %%r15, %%r15;" " adox %%rax, %%r10;" " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" " adcx %%r9, %%r9;" " adox %%r15, %%r12;" " adcx %%r10, %%r10;" " adox %%r15, %%r13;" " adcx %%r11, %%r11;" " adox %%r15, %%r14;" " adcx %%r12, %%r12;" " adcx %%r13, %%r13;" " adcx %%r14, %%r14;" /* Step 3: Compute intermediate squares */ " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ " movq %%rax, 64(%0);" " add %%rcx, %%r8;" " movq %%r8, 72(%0);" " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" /* Line up pointers */ " mov %0, %1;" " mov %2, %0;" /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" " mulxq 32(%1), %%r8, %%r13;" " xor %%rcx, %%rcx;" " adoxq 0(%1), %%r8;" " mulxq 40(%1), %%r9, %%r12;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" " adcx %%r12, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" " adoxq 24(%1), %%r11;" " adcx %%rcx, %%rax;" " adox %%rcx, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" " movq %%r9, 8(%0);" " adcx %%rcx, %%r10;" " movq %%r10, 16(%0);" " adcx %%rcx, %%r11;" " movq %%r11, 24(%0);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%0);" /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" " mulxq 96(%1), %%r8, %%r13;" " xor %%rcx, %%rcx;" " adoxq 64(%1), %%r8;" " mulxq 104(%1), %%r9, %%r12;" " adcx %%r13, %%r9;" " adoxq 72(%1), %%r9;" " mulxq 112(%1), %%r10, %%r13;" " adcx %%r12, %%r10;" " adoxq 80(%1), %%r10;" " mulxq 120(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" " adoxq 88(%1), %%r11;" " adcx %%rcx, %%rax;" " adox %%rcx, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" " movq %%r9, 40(%0);" " adcx %%rcx, %%r10;" " movq %%r10, 48(%0);" " adcx %%rcx, %%r11;" " movq %%r11, 56(%0);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 32(%0);" : "+&r" (tmp), "+&r" (f), "+&r" (out) : : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" ); } static __always_inline uint64_t eq_mask(uint64_t a, uint64_t b) { uint64_t x = a ^ b; uint64_t minus_x = ~x + (uint64_t)1U; uint64_t x_or_minus_x = x | minus_x; uint64_t xnx = x_or_minus_x >> (uint32_t)63U; return xnx - (uint64_t)1U; } static __always_inline uint64_t gte_mask(uint64_t a, uint64_t b) { uint64_t x = a; uint64_t y = b; uint64_t x_xor_y = x ^ y; uint64_t x_sub_y = x - y; uint64_t x_sub_y_xor_y = x_sub_y ^ y; uint64_t q = x_xor_y | x_sub_y_xor_y; uint64_t x_xor_q = x ^ q; uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U; return x_xor_q_ - (uint64_t)1U; } static void store_felem(uint64_t *b, uint64_t *f) { uint64_t f30 = f[3U]; uint64_t top_bit0 = f30 >> (uint32_t)63U; uint64_t carry0; uint64_t f31; uint64_t top_bit; uint64_t carry; uint64_t f0; uint64_t f1; uint64_t f2; uint64_t f3; uint64_t m0; uint64_t m1; uint64_t m2; uint64_t m3; uint64_t mask; uint64_t f0_; uint64_t f1_; uint64_t f2_; uint64_t f3_; uint64_t o0; uint64_t o1; uint64_t o2; uint64_t o3; f[3U] = f30 & (uint64_t)0x7fffffffffffffffU; carry0 = add_scalar(f, f, (uint64_t)19U * top_bit0); f31 = f[3U]; top_bit = f31 >> (uint32_t)63U; f[3U] = f31 & (uint64_t)0x7fffffffffffffffU; carry = add_scalar(f, f, (uint64_t)19U * top_bit); f0 = f[0U]; f1 = f[1U]; f2 = f[2U]; f3 = f[3U]; m0 = gte_mask(f0, (uint64_t)0xffffffffffffffedU); m1 = eq_mask(f1, (uint64_t)0xffffffffffffffffU); m2 = eq_mask(f2, (uint64_t)0xffffffffffffffffU); m3 = eq_mask(f3, (uint64_t)0x7fffffffffffffffU); mask = ((m0 & m1) & m2) & m3; f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU); f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU); f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU); f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU); o0 = f0_; o1 = f1_; o2 = f2_; o3 = f3_; b[0U] = o0; b[1U] = o1; b[2U] = o2; b[3U] = o3; } enum { QWORDS_PER_FIELD_ELEMENT = 4 }; uint64_t out_fadd[QWORDS_PER_FIELD_ELEMENT]; uint64_t f1_fadd[QWORDS_PER_FIELD_ELEMENT]; uint64_t f2_fadd[QWORDS_PER_FIELD_ELEMENT]; uint64_t out_fsub[QWORDS_PER_FIELD_ELEMENT]; uint64_t f1_fsub[QWORDS_PER_FIELD_ELEMENT]; uint64_t f2_fsub[QWORDS_PER_FIELD_ELEMENT]; uint64_t out_fmul[QWORDS_PER_FIELD_ELEMENT]; uint64_t f1_fmul[QWORDS_PER_FIELD_ELEMENT]; uint64_t f2_fmul[QWORDS_PER_FIELD_ELEMENT]; uint64_t out_fmul2[QWORDS_PER_FIELD_ELEMENT * 2]; uint64_t f1_fmul2[QWORDS_PER_FIELD_ELEMENT * 2]; uint64_t f2_fmul2[QWORDS_PER_FIELD_ELEMENT * 2]; uint64_t out_fmul_scalar[QWORDS_PER_FIELD_ELEMENT]; uint64_t f1_fmul_scalar[QWORDS_PER_FIELD_ELEMENT]; uint64_t f2_fmul_scalar; uint64_t out_fsqr[QWORDS_PER_FIELD_ELEMENT]; uint64_t f1_fsqr[QWORDS_PER_FIELD_ELEMENT]; uint64_t out_fsqr2[QWORDS_PER_FIELD_ELEMENT * 2]; uint64_t f1_fsqr2[QWORDS_PER_FIELD_ELEMENT * 2]; uint64_t bit_cswap2; uint64_t p1_cswap2[QWORDS_PER_FIELD_ELEMENT * 2]; uint64_t p2_cswap2[QWORDS_PER_FIELD_ELEMENT * 2]; int main(int argc, char *argv[]) { uint64_t tmp[QWORDS_PER_FIELD_ELEMENT * 4]; fadd(out_fadd, f1_fadd, f2_fadd); store_felem(out_fadd, out_fadd); fsub(out_fsub, f1_fsub, f2_fsub); store_felem(out_fsub, out_fsub); fmul(out_fmul, f1_fmul, f2_fmul, tmp); store_felem(out_fmul, out_fmul); fmul2(out_fmul2, f1_fmul2, f2_fmul2, tmp); store_felem(out_fmul2, out_fmul2); store_felem(out_fmul2 + QWORDS_PER_FIELD_ELEMENT, out_fmul2 + QWORDS_PER_FIELD_ELEMENT); fmul_scalar(out_fmul_scalar, f1_fmul_scalar, f2_fmul_scalar); store_felem(out_fmul_scalar, out_fmul_scalar); fsqr(out_fsqr, f1_fsqr, tmp); store_felem(out_fsqr, out_fsqr); fsqr2(out_fsqr2, f1_fsqr2, tmp); store_felem(out_fsqr2, out_fsqr2); store_felem(out_fsqr2 + QWORDS_PER_FIELD_ELEMENT, out_fsqr2 + QWORDS_PER_FIELD_ELEMENT); cswap2(bit_cswap2, p1_cswap2, p2_cswap2); return 0; } import idaapi import idc class linux_und(idaapi.IDP_Hooks): def __init__(self): idaapi.IDP_Hooks.__init__(self) self.n = idaapi.netnode("$ X86 Linux Undefined Instructions", 0, 1) def ev_ana_insn(self, ins): if idaapi.get_byte(ins.ea) != 0x0f: return False next_byte = idaapi.get_byte(ins.ea + 1) if next_byte == 0xff: name = "ud0" elif next_byte == 0xb9: name = "ud1" elif next_byte == 0x0b: name = "ud2" else: return False ins.itype = idaapi.CUSTOM_CMD_ITYPE + next_byte ins.size = 2 idaapi.set_manual_insn(ins.ea, name) return True class linux_und_t(idaapi.plugin_t): flags = idaapi.PLUGIN_PROC | idaapi.PLUGIN_HIDE comment = "Instruction Decoder" wanted_hotkey = "" help = "Runs transparently" wanted_name = "linux_und" hook = None def init(self): self.hook = None if idaapi.ph_get_id() != idaapi.PLFM_386: return idaapi.PLUGIN_SKIP self.hook = linux_und() self.hook.hook() return idaapi.PLUGIN_KEEP def run(self, arg): pass def term(self): if self.hook: self.hook.unhook() def PLUGIN_ENTRY(): return linux_und_t() #include <string> class AudioFile { public: AudioFile(const std::string &filename); inline bool isValid() const { return m_isValid; }; inline std::string filename() const { return m_filename; }; inline std::string artist() const { return m_artist; }; inline std::string composer() const { return m_composer; }; inline std::string album() const { return m_album; }; inline std::string albumArtist() const { return m_albumArtist; }; inline std::string title() const { return m_title; }; inline std::string genre() const { return m_genre; }; inline std::string comment() const { return m_comment; }; inline unsigned int track() const { return m_track; }; inline unsigned int disc() const { return m_disc; }; inline unsigned int discTotal() const { return m_discTotal; }; inline unsigned int bpm() const { return m_bpm; }; inline unsigned int year() const { return m_year; }; inline unsigned int length() const { return m_length; }; inline unsigned int bitrate() const { return m_bitrate; }; inline unsigned int sampleRate() const { return m_sampleRate; }; inline unsigned int channels() const { return m_channels; }; inline bool compilation() const { return m_compilation; }; bool operator<(const AudioFile &other) const; private: bool m_isValid; std::string m_filename; std::string m_artist; std::string m_composer; std::string m_album; std::string m_albumArtist; std::string m_title; std::string m_genre; std::string m_comment; unsigned int m_track; unsigned int m_disc; unsigned int m_discTotal; unsigned int m_bpm; unsigned int m_year; unsigned int m_length; unsigned int m_bitrate; unsigned int m_sampleRate; unsigned int m_channels; bool m_compilation; }; #include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <sys/syscall.h> #define __NR_ptree 313 struct prinfo { long state; pid_t pid; pid_t parent_pid; pid_t first_child_pid; pid_t next_sibling_pid; long uid; char comm[64]; }; static inline int ptree(struct prinfo *processes, unsigned int *count) { return syscall(__NR_ptree, processes, count); } static pid_t find_parent(struct prinfo *processes, unsigned int count, pid_t pid) { int i; for (i = 0; i < count; ++i) { if (processes[i].pid == pid) return processes[i].parent_pid; } return 0; } static int hilariously_inefficient_method_of_finding_indentation_level(struct prinfo *processes, unsigned int count, pid_t pid) { int indentation = 0; while((pid = find_parent(processes, count, pid))) ++indentation; return indentation; /* Bonus points if you can find a reasonable _looking_ * algorithm that is even more inefficient. */ } int main(int argc, char *argv[]) { struct prinfo *processes; unsigned int count, indentation, i, j; pid_t last_ppid; indentation = 0; count = 32768; processes = malloc(sizeof(struct prinfo) * count); if (!processes) { perror("processes"); return EXIT_FAILURE; } memset(processes, 0, sizeof(struct prinfo) * count); if (ptree(processes, &count)) { perror("ptree"); return EXIT_FAILURE; } for (i = 0; i < count; ++i) { indentation = hilariously_inefficient_method_of_finding_indentation_level(processes, count, processes[i].pid); for (j = 0; j < indentation; ++j) putchar('\t'); printf("%s,%d,%ld,%d,%d,%d,%ld\n", processes[i].comm, processes[i].pid, processes[i].state, processes[i].parent_pid, processes[i].first_child_pid, processes[i].next_sibling_pid, processes[i].uid); } printf("%d processes\n", count); return EXIT_SUCCESS; } .

Projects