diff -Naur vdr-1.5.8.org/channels.c vdr-1.5.8/channels.c --- vdr-1.5.8.org/channels.c 2009-01-10 10:11:45.320620629 -0500 +++ vdr-1.5.8/channels.c 2009-01-10 10:57:03.692745967 -0500 @@ -13,6 +13,7 @@ #include "device.h" #include "epg.h" #include "timers.h" +#include "memcpy.h" // IMPORTANT NOTE: in the 'sscanf()' calls there is a blank after the '%d' // format characters in order to allow any number of blanks after a numeric @@ -231,7 +232,7 @@ shortName = strcpyrealloc(shortName, Channel.shortName); provider = strcpyrealloc(provider, Channel.provider); portalName = strcpyrealloc(portalName, Channel.portalName); - memcpy(&__BeginData__, &Channel.__BeginData__, (char *)&Channel.__EndData__ - (char *)&Channel.__BeginData__); + vdr_fast_memcpy(&__BeginData__, &Channel.__BeginData__, (char *)&Channel.__EndData__ - (char *)&Channel.__BeginData__); return *this; } diff -Naur vdr-1.5.8.org/ci.c vdr-1.5.8/ci.c --- vdr-1.5.8.org/ci.c 2009-01-10 10:11:45.321620810 -0500 +++ vdr-1.5.8/ci.c 2009-01-10 10:57:03.693747229 -0500 @@ -20,6 +20,7 @@ #include "device.h" #include "pat.h" #include "tools.h" +#include "memcpy.h" // Set these to 'true' for debug output: static bool DumpTPDUDataTransfer = false; @@ -176,7 +177,7 @@ p = SetLength(p, Length + 1); *p++ = Tcid; if (Length) - memcpy(p, Data, Length); + vdr_fast_memcpy(p, Data, Length); size = Length + (p - buffer); } else @@ -403,7 +404,7 @@ *p++ = Tag & 0xFF; p = SetLength(p, Length); if (p - buffer + Length < int(sizeof(buffer))) { - memcpy(p, Data, Length); + vdr_fast_memcpy(p, Data, Length); p += Length; tc->SendData(p - buffer, buffer); } @@ -627,7 +628,7 @@ if (esInfoLengthPos) { if (length + Length < int(sizeof(capmt))) { capmt[length++] = cmdId; - memcpy(capmt + length, Data, Length); + vdr_fast_memcpy(capmt + length, Data, Length); length += Length; int l = length - esInfoLengthPos - 2; capmt[esInfoLengthPos] = (l >> 8) & 0xFF; diff -Naur vdr-1.5.8.org/config.c vdr-1.5.8/config.c --- vdr-1.5.8.org/config.c 2009-01-10 10:24:39.942620665 -0500 +++ vdr-1.5.8/config.c 2009-01-10 10:57:03.693747229 -0500 @@ -15,6 +15,7 @@ #include "interface.h" #include "plugin.h" #include "recording.h" +#include "memcpy.h" // IMPORTANT NOTE: in the 'sscanf()' calls there is a blank after the '%d' // format characters in order to allow any number of blanks after a numeric @@ -303,7 +304,7 @@ cSetup& cSetup::operator= (const cSetup &s) { - memcpy(&__BeginData__, &s.__BeginData__, (char *)&s.__EndData__ - (char *)&s.__BeginData__); + vdr_fast_memcpy(&__BeginData__, &s.__BeginData__, (char *)&s.__EndData__ - (char *)&s.__BeginData__); return *this; } diff -Naur vdr-1.5.8.org/device.c vdr-1.5.8/device.c --- vdr-1.5.8.org/device.c 2009-01-10 10:24:04.387995634 -0500 +++ vdr-1.5.8/device.c 2009-01-10 10:57:03.694622543 -0500 @@ -20,6 +20,7 @@ #include "transfer.h" #include "cutter.h" #include "videodir.h" +#include "memcpy.h" // --- cPesAssembler --------------------------------------------------------- @@ -100,7 +101,7 @@ Length--; } if (Length && Realloc(length + Length)) { - memcpy(data + length, Data, Length); + vdr_fast_memcpy(data + length, Data, Length); length += Length; } } diff -Naur vdr-1.5.8.org/dvbdevice.c vdr-1.5.8/dvbdevice.c --- vdr-1.5.8.org/dvbdevice.c 2009-01-10 10:11:45.322620948 -0500 +++ vdr-1.5.8/dvbdevice.c 2009-01-10 10:57:03.694622543 -0500 @@ -26,6 +26,7 @@ #include "receiver.h" #include "status.h" #include "transfer.h" +#include "memcpy.h" #define DO_REC_AND_PLAY_ON_PRIMARY_DEVICE 1 #define DO_MULTIPLE_RECORDINGS 1 @@ -202,7 +203,7 @@ uchar *codes = diseqc->Codes(n); if (codes) { struct dvb_diseqc_master_cmd cmd; - memcpy(cmd.msg, codes, min(n, int(sizeof(cmd.msg)))); + vdr_fast_memcpy(cmd.msg, codes, min(n, int(sizeof(cmd.msg)))); cmd.msg_len = n; CHECK(ioctl(fd_frontend, FE_DISEQC_SEND_MASTER_CMD, &cmd)); } @@ -607,8 +608,8 @@ Size = l + bytes; result = MALLOC(uchar, Size); if (result) { - memcpy(result, buf, l); - memcpy(result + l, mem, bytes); + vdr_fast_memcpy(result, buf, l); + vdr_fast_memcpy(result + l, mem, bytes); } else esyslog("ERROR: failed to convert image to PNM"); @@ -1181,7 +1182,7 @@ } if (blen + len > Length) // invalid PES length field break; - memcpy(&buf[blen], &Data[offs], len); + vdr_fast_memcpy(&buf[blen], &Data[offs], len); i = offs + len; blen += len; } diff -Naur vdr-1.5.8.org/dvbspu.c vdr-1.5.8/dvbspu.c --- vdr-1.5.8.org/dvbspu.c 2009-01-10 10:11:45.323621028 -0500 +++ vdr-1.5.8/dvbspu.c 2009-01-10 10:57:03.694622543 -0500 @@ -17,6 +17,7 @@ #include #include #include "device.h" +#include "memcpy.h" /* * cDvbSpubitmap: @@ -289,7 +290,7 @@ hlpsize.y1 = sy; hlpsize.x2 = ex; hlpsize.y2 = ey; - memcpy(hlpDescr, pld, sizeof(aDvbSpuPalDescr)); + vdr_fast_memcpy(hlpDescr, pld, sizeof(aDvbSpuPalDescr)); highlight = true; clean = false; } diff -Naur vdr-1.5.8.org/font.c vdr-1.5.8/font.c --- vdr-1.5.8.org/font.c 2009-01-10 10:11:45.323621028 -0500 +++ vdr-1.5.8/font.c 2009-01-10 10:57:03.695754214 -0500 @@ -15,6 +15,7 @@ #include "config.h" #include "osd.h" #include "tools.h" +#include "memcpy.h" const char *DefaultFontOsd = "Sans Serif:Bold"; const char *DefaultFontSml = "Sans Serif"; @@ -69,7 +70,7 @@ rows = GlyphData->bitmap.rows; pitch = GlyphData->bitmap.pitch; bitmap = MALLOC(uchar, rows * pitch); - memcpy(bitmap, GlyphData->bitmap.buffer, rows * pitch); + vdr_fast_memcpy(bitmap, GlyphData->bitmap.buffer, rows * pitch); } cGlyph::~cGlyph() diff -Naur vdr-1.5.8.org/h264parser.c vdr-1.5.8/h264parser.c --- vdr-1.5.8.org/h264parser.c 2009-01-10 10:11:45.323621028 -0500 +++ vdr-1.5.8/h264parser.c 2009-01-10 10:57:03.695754214 -0500 @@ -10,6 +10,7 @@ #include "tools.h" #include "h264parser.h" +#include "memcpy.h" namespace H264 { @@ -88,7 +89,7 @@ } if (avail + Count > size) Count = size - avail; - memcpy(data + avail, Data, Count); + vdr_fast_memcpy(data + avail, Data, Count); avail += Count; return Count; } diff -Naur vdr-1.5.8.org/Makefile vdr-1.5.8/Makefile --- vdr-1.5.8.org/Makefile 2009-01-10 10:11:45.323621028 -0500 +++ vdr-1.5.8/Makefile 2009-01-10 10:57:03.695754214 -0500 @@ -41,7 +41,7 @@ lirc.o menu.o menuitems.o nit.o osdbase.o osd.o pat.o player.o plugin.o rcu.o\ receiver.o recorder.o recording.o remote.o remux.o ringbuffer.o sdt.o sections.o shutdown.o\ skinclassic.o skins.o skinsttng.o sources.o spu.o status.o svdrp.o themes.o thread.o\ - timers.o tools.o transfer.o vdr.o videodir.o h264parser.o + timers.o tools.o transfer.o vdr.o videodir.o h264parser.o memcpy.o # SUBMENU + TinyXML OBJS += tinystr.o tinyxml.o tinyxmlerror.o tinyxmlparser.o submenu.o diff -Naur vdr-1.5.8.org/memcpy.c vdr-1.5.8/memcpy.c --- vdr-1.5.8.org/memcpy.c 1969-12-31 19:00:00.000000000 -0500 +++ vdr-1.5.8/memcpy.c 2009-01-10 10:57:03.696747195 -0500 @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2001-2004 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * These are the MMX/MMX2/SSE optimized versions of memcpy + * + * This code was adapted from Linux Kernel sources by Nick Kurshev to + * the mplayer program. (http://mplayer.sourceforge.net) + * + * Miguel Freitas split the #ifdefs into several specialized functions that + * are benchmarked at runtime by xine. Some original comments from Nick + * have been preserved documenting some MMX/SSE oddities. + * Also added kernel memcpy function that seems faster than libc one. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if defined (ARCH_PPC) && !defined (HOST_OS_DARWIN) +#include "ppcasm_string.h" +#endif + +#ifdef HAVE_SYS_TIMES_H +#include +#endif + +#include +#include + +#define LOG_MODULE "memcpy" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include "memcpy.h" +#include "tools.h" + +void *(* xine_fast_memcpy)(void *to, const void *from, size_t len); + +/* Original comments from mplayer (file: aclib.c) + This part of code was taken by me from Linux-2.4.3 and slightly modified +for MMX, MMX2, SSE instruction set. I have done it since linux uses page aligned +blocks but mplayer uses weakly ordered data and original sources can not +speedup them. Only using PREFETCHNTA and MOVNTQ together have effect! + +>From IA-32 Intel Architecture Software Developer's Manual Volume 1, + +Order Number 245470: +"10.4.6. Cacheability Control, Prefetch, and Memory Ordering Instructions" + +Data referenced by a program can be temporal (data will be used again) or +non-temporal (data will be referenced once and not reused in the immediate +future). To make efficient use of the processor's caches, it is generally +desirable to cache temporal data and not cache non-temporal data. Overloading +the processor's caches with non-temporal data is sometimes referred to as +"polluting the caches". +The non-temporal data is written to memory with Write-Combining semantics. + +The PREFETCHh instructions permits a program to load data into the processor +at a suggested cache level, so that it is closer to the processors load and +store unit when it is needed. If the data is already present in a level of +the cache hierarchy that is closer to the processor, the PREFETCHh instruction +will not result in any data movement. +But we should you PREFETCHNTA: Non-temporal data fetch data into location +close to the processor, minimizing cache pollution. + +The MOVNTQ (store quadword using non-temporal hint) instruction stores +packed integer data from an MMX register to memory, using a non-temporal hint. +The MOVNTPS (store packed single-precision floating-point values using +non-temporal hint) instruction stores packed floating-point data from an +XMM register to memory, using a non-temporal hint. + +The SFENCE (Store Fence) instruction controls write ordering by creating a +fence for memory store operations. This instruction guarantees that the results +of every store instruction that precedes the store fence in program order is +globally visible before any store instruction that follows the fence. The +SFENCE instruction provides an efficient way of ensuring ordering between +procedures that produce weakly-ordered data and procedures that consume that +data. + +If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru. +*/ + +/* mmx v.1 Note: Since we added alignment of destinition it speedups + of memory copying on PentMMX, Celeron-1 and P2 upto 12% versus + standard (non MMX-optimized) version. + Note: on K6-2+ it speedups memory copying upto 25% and + on K7 and P3 about 500% (5 times). +*/ + +/* Additional notes on gcc assembly and processors: [MF] +prefetch is specific for AMD processors, the intel ones should be +prefetch0, prefetch1, prefetch2 which are not recognized by my gcc. +prefetchnta is supported both on athlon and pentium 3. + +therefore i will take off prefetchnta instructions from the mmx1 version +to avoid problems on pentium mmx and k6-2. + +quote of the day: +"Using prefetches efficiently is more of an art than a science" +*/ + + +/* for small memory blocks (<256 bytes) this version is faster */ +#define small_memcpy(to,from,n)\ +{\ +register unsigned long int dummy;\ +__asm__ __volatile__(\ + "rep; movsb"\ + :"=&D"(to), "=&S"(from), "=&c"(dummy)\ + :"0" (to), "1" (from),"2" (n)\ + : "memory");\ +} + +/* linux kernel __memcpy (from: /include/asm/string.h) */ +static __inline__ void * linux_kernel_memcpy_impl ( + void * to, + const void * from, + size_t n) +{ +int d0, d1, d2; + + if( n < 4 ) { + small_memcpy(to,from,n); + } + else + __asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); + + return (to); +} + +#define SSE_MMREG_SIZE 16 +#define MMX_MMREG_SIZE 8 + +#define MMX1_MIN_LEN 0x800 /* 2K blocks */ +#define MIN_LEN 0x40 /* 64-byte blocks */ + +/* SSE note: i tried to move 128 bytes a time instead of 64 but it +didn't make any measureable difference. i'm using 64 for the sake of +simplicity. [MF] */ +static __inline__ void * sse_memcpy(void * to, const void * from, size_t len) +{ + void *retval; + size_t i; + retval = to; + + /* PREFETCH has effect even for MOVSB instruction ;) */ + __asm__ __volatile__ ( + " prefetchnta (%0)\n" + " prefetchnta 32(%0)\n" + " prefetchnta 64(%0)\n" + " prefetchnta 96(%0)\n" + " prefetchnta 128(%0)\n" + " prefetchnta 160(%0)\n" + " prefetchnta 192(%0)\n" + " prefetchnta 224(%0)\n" + " prefetchnta 256(%0)\n" + " prefetchnta 288(%0)\n" + : : "r" (from) ); + + if(len >= MIN_LEN) + { + register unsigned long int delta; + /* Align destinition to MMREG_SIZE -boundary */ + delta = ((unsigned long int)to)&(SSE_MMREG_SIZE-1); + if(delta) + { + delta=SSE_MMREG_SIZE-delta; + len -= delta; + small_memcpy(to, from, delta); + } + i = len >> 6; /* len/64 */ + len&=63; + if(((unsigned long)from) & 15) + /* if SRC is misaligned */ + for(; i>0; i--) + { + __asm__ __volatile__ ( + "prefetchnta 320(%0)\n" + "prefetchnta 352(%0)\n" + "movups (%0), %%xmm0\n" + "movups 16(%0), %%xmm1\n" + "movups 32(%0), %%xmm2\n" + "movups 48(%0), %%xmm3\n" + "movntps %%xmm0, (%1)\n" + "movntps %%xmm1, 16(%1)\n" + "movntps %%xmm2, 32(%1)\n" + "movntps %%xmm3, 48(%1)\n" + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; + } + else + /* + Only if SRC is aligned on 16-byte boundary. + It allows to use movaps instead of movups, which required data + to be aligned or a general-protection exception (#GP) is generated. + */ + for(; i>0; i--) + { + __asm__ __volatile__ ( + "prefetchnta 320(%0)\n" + "prefetchnta 352(%0)\n" + "movaps (%0), %%xmm0\n" + "movaps 16(%0), %%xmm1\n" + "movaps 32(%0), %%xmm2\n" + "movaps 48(%0), %%xmm3\n" + "movntps %%xmm0, (%1)\n" + "movntps %%xmm1, 16(%1)\n" + "movntps %%xmm2, 32(%1)\n" + "movntps %%xmm3, 48(%1)\n" + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; + } + /* since movntq is weakly-ordered, a "sfence" + * is needed to become ordered again. */ + __asm__ __volatile__ ("sfence":::"memory"); + } + /* + * Now do the tail of the block + */ + if(len) linux_kernel_memcpy_impl(to, from, len); + return retval; +} + +static __inline__ void * mmx_memcpy(void * to, const void * from, size_t len) +{ + void *retval; + size_t i; + retval = to; + + if(len >= MMX1_MIN_LEN) + { + register unsigned long int delta; + /* Align destinition to MMREG_SIZE -boundary */ + delta = ((unsigned long int)to)&(MMX_MMREG_SIZE-1); + if(delta) + { + delta=MMX_MMREG_SIZE-delta; + len -= delta; + small_memcpy(to, from, delta); + } + i = len >> 6; /* len/64 */ + len&=63; + for(; i>0; i--) + { + __asm__ __volatile__ ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + "movq %%mm0, (%1)\n" + "movq %%mm1, 8(%1)\n" + "movq %%mm2, 16(%1)\n" + "movq %%mm3, 24(%1)\n" + "movq %%mm4, 32(%1)\n" + "movq %%mm5, 40(%1)\n" + "movq %%mm6, 48(%1)\n" + "movq %%mm7, 56(%1)\n" + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; + } + __asm__ __volatile__ ("emms":::"memory"); + } + /* + * Now do the tail of the block + */ + if(len) linux_kernel_memcpy_impl(to, from, len); + return retval; +} + +static __inline__ void * mmx2_memcpy(void * to, const void * from, size_t len) +{ + void *retval; + size_t i; + retval = to; + + /* PREFETCH has effect even for MOVSB instruction ;) */ + __asm__ __volatile__ ( + " prefetchnta (%0)\n" + " prefetchnta 32(%0)\n" + " prefetchnta 64(%0)\n" + " prefetchnta 96(%0)\n" + " prefetchnta 128(%0)\n" + " prefetchnta 160(%0)\n" + " prefetchnta 192(%0)\n" + " prefetchnta 224(%0)\n" + " prefetchnta 256(%0)\n" + " prefetchnta 288(%0)\n" + : : "r" (from) ); + + if(len >= MIN_LEN) + { + register unsigned long int delta; + /* Align destinition to MMREG_SIZE -boundary */ + delta = ((unsigned long int)to)&(MMX_MMREG_SIZE-1); + if(delta) + { + delta=MMX_MMREG_SIZE-delta; + len -= delta; + small_memcpy(to, from, delta); + } + i = len >> 6; /* len/64 */ + len&=63; + for(; i>0; i--) + { + __asm__ __volatile__ ( + "prefetchnta 320(%0)\n" + "prefetchnta 352(%0)\n" + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + "movntq %%mm0, (%1)\n" + "movntq %%mm1, 8(%1)\n" + "movntq %%mm2, 16(%1)\n" + "movntq %%mm3, 24(%1)\n" + "movntq %%mm4, 32(%1)\n" + "movntq %%mm5, 40(%1)\n" + "movntq %%mm6, 48(%1)\n" + "movntq %%mm7, 56(%1)\n" + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; + } + /* since movntq is weakly-ordered, a "sfence" + * is needed to become ordered again. */ + __asm__ __volatile__ ("sfence":::"memory"); + __asm__ __volatile__ ("emms":::"memory"); + } + /* + * Now do the tail of the block + */ + if(len) linux_kernel_memcpy_impl(to, from, len); + return retval; +} + +void * vdr_fast_memcpy (void * to, const void * from, size_t n) +{ + return sse_memcpy(to, from, n); +} + diff -Naur vdr-1.5.8.org/memcpy.h vdr-1.5.8/memcpy.h --- vdr-1.5.8.org/memcpy.h 1969-12-31 19:00:00.000000000 -0500 +++ vdr-1.5.8/memcpy.h 2009-01-10 10:57:03.696747195 -0500 @@ -0,0 +1,14 @@ +/* + * memcpy.h: fast memory copy + * + * See the main source file 'vdr.c' for copyright information and + * how to reach the author. + * + * $Id: memcpy.h 1.17 2008/05/23 10:54:51 kls Exp $ + */ + +#ifndef __MEMCPY_H +#define __MEMCPY_H + +void * vdr_fast_memcpy (void * to, const void * from, size_t n); +#endif // __MEMCPY_H diff -Naur vdr-1.5.8.org/osd.c vdr-1.5.8/osd.c --- vdr-1.5.8.org/osd.c 2009-01-10 10:11:45.324628132 -0500 +++ vdr-1.5.8/osd.c 2009-01-10 10:57:03.696747195 -0500 @@ -16,6 +16,7 @@ #include #include "tools.h" #include "osdcontroller.h" +#include "memcpy.h" // --- cPalette -------------------------------------------------------------- @@ -522,7 +523,7 @@ tIndex *pSrc = pRowSrc; \ tIndex *pDst = pRowDst; \ if (CanUseMemCpy) \ - memcpy(pDst, pSrc, n); \ + vdr_fast_memcpy(pDst, pSrc, n); \ else { \ for (int xx = x1; xx <= x2; xx++) { \ if (TransferCondition) \ @@ -697,7 +698,7 @@ int n = sizeof(tIndex) * (x2 - x1 + 1); for (int y = y1 + 1; y <= y2; y++) { pRowDst += width; - memcpy(pRowDst, pRowSrc, n); + vdr_fast_memcpy(pRowDst, pRowSrc, n); } } diff -Naur vdr-1.5.8.org/pat.c vdr-1.5.8/pat.c --- vdr-1.5.8.org/pat.c 2009-01-10 10:11:45.325624702 -0500 +++ vdr-1.5.8/pat.c 2009-01-10 10:57:03.696747195 -0500 @@ -13,6 +13,7 @@ #include "libsi/section.h" #include "libsi/descriptor.h" #include "thread.h" +#include "memcpy.h" #define PMT_SCAN_TIMEOUT 10 // seconds @@ -47,7 +48,7 @@ data[4] = ((CaPid >> 8) & 0x1F) | 0xE0; data[5] = CaPid & 0xFF; if (Length) - memcpy(&data[6], Data, Length); + vdr_fast_memcpy(&data[6], Data, Length); } cCaDescriptor::~cCaDescriptor() @@ -163,7 +164,7 @@ if (IsStream >= 0 && IsStream != d->Stream()) dsyslog("CAM: different stream flag in CA descriptors"); IsStream = d->Stream(); - memcpy(Data + length, d->Data(), d->Length()); + vdr_fast_memcpy(Data + length, d->Data(), d->Length()); length += d->Length(); } else diff -Naur vdr-1.5.8.org/remux.c vdr-1.5.8/remux.c --- vdr-1.5.8.org/remux.c 2009-01-10 10:21:15.660621218 -0500 +++ vdr-1.5.8/remux.c 2009-01-10 10:57:03.697871422 -0500 @@ -21,6 +21,7 @@ #include "tools.h" #include "recording.h" #include "h264parser.h" +#include "memcpy.h" ePesHeader AnalyzePesHeader(const uchar *Data, int Count, int &PesPayloadOffset, bool *ContinuationHeader) { @@ -732,7 +733,7 @@ // code that commences in this PES packet. A picture start code commences // in PES packet if the first byte of the picture start code is present // in the PES packet. - memcpy(pesHeader, pesHeaderBackup, pesHeaderBackupLen); + vdr_fast_memcpy(pesHeader, pesHeaderBackup, pesHeaderBackupLen); pesHeaderLen = pesHeaderBackupLen; pesHeaderBackupLen = 0; } @@ -902,7 +903,7 @@ if (!continuationHeader) { // backup PES header pesHeaderBackupLen = pesPayloadOffset; - memcpy(pesHeaderBackup, Data, pesHeaderBackupLen); + vdr_fast_memcpy(pesHeaderBackup, Data, pesHeaderBackupLen); } // skip PES header @@ -1015,14 +1016,14 @@ int bite = pesHeaderLen; pesHeaderLen = 0; if (bite > 0) { - memcpy(fragmentData + fragmentLen, pesHeader, bite); + vdr_fast_memcpy(fragmentData + fragmentLen, pesHeader, bite); fragmentLen += bite; } // append payload. It may contain part of a start code at it's end, // which will be removed when the next packet gets processed. bite = data - payload; if (bite > 0) { - memcpy(fragmentData + fragmentLen, payload, bite); + vdr_fast_memcpy(fragmentData + fragmentLen, payload, bite); fragmentLen += bite; } } @@ -1347,7 +1348,7 @@ if (!continuationHeader) { // backup PES header pesHeaderBackupLen = pesPayloadOffset; - memcpy(pesHeaderBackup, Data, pesHeaderBackupLen); + vdr_fast_memcpy(pesHeaderBackup, Data, pesHeaderBackupLen); } // skip PES header @@ -1389,7 +1390,7 @@ // it shall refer to the access unit commencing in the PES packet. An // audio access unit commences in a PES packet if the first byte of // the audio access unit is present in the PES packet. - memcpy(pesHeader, pesHeaderBackup, pesHeaderBackupLen); + vdr_fast_memcpy(pesHeader, pesHeaderBackup, pesHeaderBackupLen); pesHeaderLen = pesHeaderBackupLen; pesHeaderBackupLen = 0; } @@ -1540,14 +1541,14 @@ int bite = pesHeaderLen; pesHeaderLen = 0; if (bite > 0) { - memcpy(fragmentData + fragmentLen, pesHeader, bite); + vdr_fast_memcpy(fragmentData + fragmentLen, pesHeader, bite); fragmentLen += bite; } // append payload. It may contain part of an audio frame header at it's // end, which will be removed when the next packet gets processed. bite = data - payload; if (bite > 0) { - memcpy(fragmentData + fragmentLen, payload, bite); + vdr_fast_memcpy(fragmentData + fragmentLen, payload, bite); fragmentLen += bite; } } @@ -1764,7 +1765,7 @@ else { // copy the fragment into separate buffer for later processing Bite = Todo; - memcpy(fragmentData + fragmentLen, Data, Bite); + vdr_fast_memcpy(fragmentData + fragmentLen, Data, Bite); fragmentLen += Bite; fragmentTodo -= Bite; } @@ -1799,12 +1800,12 @@ else { fragmentTodo = packetLen; // copy the pesheader into separate buffer for later processing - memcpy(fragmentData + fragmentLen, pesHeader, Bite); + vdr_fast_memcpy(fragmentData + fragmentLen, pesHeader, Bite); fragmentLen += Bite; fragmentTodo -= Bite; // copy the fragment into separate buffer for later processing Bite = Todo; - memcpy(fragmentData + fragmentLen, Data, Bite); + vdr_fast_memcpy(fragmentData + fragmentLen, Data, Bite); fragmentLen += Bite; fragmentTodo -= Bite; } @@ -1825,7 +1826,7 @@ // backup PES header if (Data[6] != 0x80 || Data[7] != 0x00 || Data[8] != 0x00) { pesHeaderBackupLen = 6 + 3 + Data[8]; - memcpy(pesHeaderBackup, Data, pesHeaderBackupLen); + vdr_fast_memcpy(pesHeaderBackup, Data, pesHeaderBackupLen); } // skip PES header @@ -1843,7 +1844,7 @@ if (pesHeaderBackupLen > 0) { pesHeaderLen = pesHeaderBackupLen; pesHeaderBackupLen = 0; - memcpy(pesHeader, pesHeaderBackup, pesHeaderLen); + vdr_fast_memcpy(pesHeader, pesHeaderBackup, pesHeaderLen); AppendSubStreamID(); } } @@ -2144,7 +2145,7 @@ void cTS2PES::write_ipack(const uint8_t *Data, int Count) { if (count < 6) { - memcpy(buf, headr, 3); + vdr_fast_memcpy(buf, headr, 3); count = 6; } @@ -2162,7 +2163,7 @@ } } - memcpy(buf + count, Data, bite); + vdr_fast_memcpy(buf + count, Data, bite); count += bite; if (repacker) { diff -Naur vdr-1.5.8.org/ringbuffer.c vdr-1.5.8/ringbuffer.c --- vdr-1.5.8.org/ringbuffer.c 2009-01-10 10:11:45.326624672 -0500 +++ vdr-1.5.8/ringbuffer.c 2009-01-10 10:57:03.698721168 -0500 @@ -14,6 +14,7 @@ #include #include #include "tools.h" +#include "memcpy.h" // --- cRingBuffer ----------------------------------------------------------- @@ -259,13 +260,13 @@ if (free < Count) Count = free; if (Count >= rest) { - memcpy(buffer + head, Data, rest); + vdr_fast_memcpy(buffer + head, Data, rest); if (Count - rest) - memcpy(buffer + margin, Data + rest, Count - rest); + vdr_fast_memcpy(buffer + margin, Data + rest, Count - rest); head = margin + Count - rest; } else { - memcpy(buffer + head, Data, Count); + vdr_fast_memcpy(buffer + head, Data, Count); head += Count; } } @@ -291,7 +292,7 @@ int rest = Size() - tail; if (rest < margin && Head < tail) { int t = margin - rest; - memcpy(buffer + t, buffer + tail, rest); + vdr_fast_memcpy(buffer + t, buffer + tail, rest); tail = t; rest = Head - tail; } @@ -341,7 +342,7 @@ else { data = MALLOC(uchar, count); if (data) - memcpy(data, Data, count); + vdr_fast_memcpy(data, Data, count); else esyslog("ERROR: can't allocate frame buffer (count=%d)", count); }