• what is Accelerate? → 다른 Accelerate 영상에서 많이 다루니 패스

  • Swift Overlay 추가

    • vDSP → 신호 처리용 함수 뿐 아니라 일반적인 벡터와 행렬 연산, 타입 변환을 포함한다.

      // for-loop
      var result = [Float](repeating: 0, count: n)
      
      for i in 0..<n {
      	result[i] = (a[i] + b[i]) * (c[i] - d[i])
      }
      
      // 클래식 vDSP
      vDSP_vasbm(a, 1,
      						b, 1,
      						c, 1,
      						d, 1,
      						&result, 1,
      						vDSP_Length(result.count))
      
      // 모던 vDSP
      // 포인터 대신 타입 정보를 활용하기 때문에 명시적으로 카운트를 넘길 필요가 없다.
      vDSP.multiply(addition: (a,b),
      							subtraction: (c,d),
      							result: &result)
      
      // self-allocating API도 제공. 여러번 돌릴때의 성능을 위해서는 buffer를 직접 제공해주는 게 좋긴 하다.
      let result = vDSP.multiply(addition: (a,b), subtraction: (c,d))
      
    • vDSP 타입 변환

      // without accelerate
      let result = source.map {
      	return UInt16($0.rounded(.towardZero))
      }
      
      // with classic vDSP
      let result = Array<UInt16>(_ unsafeUninitializedCapacity: source.count) {
      	buffer, initialzedCount in
      	
      	vDSP_vfixu16(source, 1,
      							buffer.baseAddress!, 1,
      							vDSP_Length(source.count))
      
      	initializedCount = source.count
      }
      
      // with modern vDSP
      
      let result = vDSP.floatingPointToInteger(source,
      																					integerType: UInt16.self,
      																					rounding: .towardZero)
      
    • DFT

      // classic vDSP
      let setup = vDSP_DFT_zop_CreateSetup(
      		nil,
      		vDSP_Length(n),
      		.FORWARD)!
      
      var outputReal = [Float](repeating: 0, count: n)
      var ouputImage = [Float](repeating: 0, count: n)
      
      vDSP_DFT_Execute(setup,
      								inputReal, inputImag,
      								&outputReal, &outputImag)
      
      vDSP_DFT_DestroySetup(setup)
      
      // modern vDSP
      
      let fwdDFT = vDSP.DFT(
      			count: n,
      			direction: .forward,
      			transformType: .complexComplex,
      			ofType: Float.self)!
      
      var outputReal = [Float](repeating: 0, count: n)
      var ouputImage = [Float](repeating: 0, count: n)
      
      fwdDFT.transform(inputReal: inputReal,
      								inputImaginary: inputImag,
      								outputReal: &outputReal,
      								outputImaginary: &outputImag)
      
      // self-allocating 버전
      let returnedResult = fwdDFT.transform(inputReal: inputReal,
      																	inputImaginary: inputImag)
      
    • Biquadratic filtering

      let sections = vDSP_Length(1)
      
      let b0 = 0.0001
      let b1 = 0.001
      let b2 = 0.0005
      let a1 = -1.9795
      let a2 = 0.98
      
      let channelCount = vDSP_Length(2)
      
      let output = [Float](repeating: -1,
      										count: n)
      let setup = vDSP_biquadm_CreateSetup([b0, b1, b2, a1, a2,
      																			b0, b1, b2, a1, a2],
      																			vDSP_Length(sections),
      																			vDSP_Length(channelCount))!
      
      signal.withUnsafeBufferPointer { inputBuffer in
      	output.withUnsafeMutableBufferPointer { outputBuffer in
      		let length = vDSP_Length(n) / channelCount
      
      		var inputs: [UnsafePointer<Float>] = (0 ..< channelCount).map { i in
      			return inputBuffer.baseAddress!.advanced(by: Int(i*length))
      		}
      
      		var outputs: [UnsafeMutablePointer<Float>] = (0 ..< channelCount).map { i in
      				return outputBuffer.baseAddress!.advanced(by: Int(i*length))
      		}
      
      		vDSP_biquad(setup, &inputs, 1, &outputs, 1,
      										vDSP_Length(n) / channelCount)
      	}
      }
      
      // modern API
      var biquad = vDSP.Biquad(coefficients: [b0, b1, b2, a1, a2,
      																			b0, b1, b2, a1, a2],
      												channelCount: channelCount,
      												sectionCount: sections,
      												ofType: Float.self)!
      
    • vForce - vDSP에는 없는 좀 더 복잡한 연산을 지원

      • 제곱근

        let a: [Float] = ...
        
        let result = a.map {
        		sqrt($0)
        }
        
        // classic vForce
        
        var result = [Float](repeating: 0, count: count)
        
        var n = Int32(result.count)
        
        vvsqrtf(&result,
        				a,
        				&n)
        // modern
        vForce.sqrt(a, result: &result)
        
        let result = vForce.sqrt(a)
        
      • 구적법(Quadrature)

        //  classic 
        var integrateFunction: quadrature_integrate_function = {
        	return quadrature_integrate_function(
        	fun: { (arg: UnsafeMutableRawPointer?, n: Int,
        					x: UnsafePointer<Double>, y: UnsafeMutablePointer<Double>) in
        
        				guard let radius = arg?.load(as: Double.self) else { return }
        
        				(0..<n).forEach { i in
        					y[i] = sqrt(radius * radius - x[i] * x[i])
        				}
        	},
        		fun_arg: &radius)
        }()
        
        var options = quadrature_integrate_options(integrator: QUADRATURE_INTEGRATE_QNG,
        																						abs_tolerance: 1.0e-8,
        																						rel_tolerance: 1.0e-2,
        																						qag_points_per_interval: 0,
        																						max_intervals: 0)
        
        var status = QUADRATURE_SUCCESS
        var estimatedAbsoluteError: Double = 0
        
        let result = quadrature_integrate(&integrateFunction,
        																	-radius,
        																	radius,
        																	&options,
        																	&status,
        																	&estimatedAbsoluteError,
        																	0,
        																	nil)
        
        // modern
        let quadrature = Quadrature(integrator: .nonAdaptive,
        														absoluteTolerance: 1.0e-8,
        														relativeTolerance: 1.0e-2)
        
        let result = quadrature.integrate(over: -radius...radius) { x in
        		return sqrt(radius * radius - x * x)
        }
        
        let quadrature = Quadrature(integrator: .adaptive(pointesPerInterval: .fifteen,
        																									maxIntervals: 7)
        																				absoluteTolerance: 1.0e-8,
        																				relativeTolerance: 1.0e-2)
        
    • vImage

      • 주요 변경점

        • flag들이 이제 optionSet이다.
        • swift error를 던진다.
        • 픽셀 포맷과 버퍼 타입에 맞는 enumeration
        • Unmanaged 타입과 mutable buffer를 인터페이스에서 숨김
        • free function에서 buffer와 format의 프로퍼티로 옮김
      • 버퍼 초기화

        // classic
        var format = vImage_CGImageFormat(
        	bitsPerComponent: 8,
        	bitsPerPixel: 32,
        	colorSpace: nil,
        	bitmapInfo: CGBitmapInfo(rawValue: CGImageAlphaInfo.first.rawValue),
        	version: 0,
        	decode: nil,
        	renderingIntent: .defaultIntent
        )
        
        var sourceBuffer = vImage_Buffer()
        
        var error = kvImageNoError
        error = vImageBuffer_InitWithCGImage(&sourceBuffer,
        																			&format,
        																			nil,
        																			image,
        																			vImage_Flags(kvImageNoFlags))
        
        guard error == kvImageNoError else {
        		fatalError("Error in vImageBuffer_InitWithCGImage: \\(error)")
        }
        
        // modern
        let sourceBuffer = try? vImage_Buffer(cgImage: image)
        
        // format을 명시적으로 만드는 initializer
        let format = vImage_CGImageFormat(cgImage: image)!
        
        let sourceBuffer = try? vImage_Buffer(cgImage: image,
        																			format: format)
        
      • 버퍼를 이미지화

        // classic
        let cgImage = vImageCreateCGImageFromBuffer(
        	&sourceBuffer,
        	&format,
        	nil,
        	nil,
        	vImage_Flags(kvImageNoFlags),
        	&error)
        
        let cgImage = try? sourceBuffer.createCGImage(format: format)
        
      • vImage의 Converting기능

        • Core Graphics(CMYK) ↔ Core Graphics(RGB)
        • Core Video ↔ Core Graphics
        // CMYK -> RGB
        
        // classic
        let cmykToRgbUnmanagedConverter = vImageConverter_CreateWithCGImageFormat(
        	&cmykSourceImageFormat,
        	&rgbDestinationImageFormat,
        	nil,
        	vImage_Flags(kvImageNoFlags),
        	nil
        )
        
        guard let cmykToRgbConverter = cmykToRgbUnmanagedConverter?.takeRetainedValue() else {
        	return
        }
        
        vImageConvert_AnyToAny(cmykToRgbConverter,
        											&cmykSourceBuffer,
        											&rgbDestinationBuffer,
        											nil,
        											vImage_Flags(kvImageNoFlags))
        
        // modern
        let converter = try? vIamgeConverter.make(sourceFormat: cmykSourceImageFormat,
        																					destinationFormat: rgbDestinationImageFormat)
        
        try? converter?.convert(source: cmykSourceBuffer,
        												destination: &rgbDestinationBuffer)
        
        • CVImageFormat 다루기
        // channelCount구하기
        
        // classic
        let cvImageFormat = vImageCVImageFormat_CreateWithCVPixelBuffer(pixelBuffer).takeRetainedValue()
        
        let cvImageFormatPointer = UnsafeMutableRawPointer.allocate(
        			byteCount: MemoryLayout<vImageCVImageFormat>.size,
        			alignment: MemoryLayout<vImageCVImageFormat>.alignment)
        
        cvImageFormatPointer.storeBytes(of: cvImageFormat,
        																as: vImageCVImageFormat.self)
        
        let cvConstImageFormat = cvImageFormatPointer.load(as: vImageConstCVImageForat.self)
        
        let channelCount = vImageCVImageFormat_GetChannelCount(cvConstImageFormat)
        
        let cvImageFormat = vImageCVImageFormat.make(buffer: pixelBuffer)
        
        let channelCount = cvImageFormat?.channelCount
        
  • Linpack 벤치마크

    • LAPack 기반
    • LAPack은 BLAS 기반
    • BLAS는 SGEMM(Single precision General Matrix Multiply)라는 matrix solver를 기반으로 수많은 행렬 연산을 수행한다. → 그래서 SGEMM 수행능력을 성능 측정의 기준으로 삼을 수 있다.