Adding APIs in arm dma mapping code to map a scatterlist in iommu domain
and get dma address. Allocators outside dma-mapping code like ION could
allocate pages and devices would need to map them for iommu and obtain a
linear dma address. Intention is to re-use the IOVA managment code and "mapping"
across allocators.
Can the above requirement be done without adding new APIs?
APIs available to map an sglist (arm_dma_map_sg) does not provide a linear
dma address.
Signed-off-by: Nishanth Peethambaran <nishanth(a)broadcom.com>
---
arch/arm/include/asm/dma-mapping.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/arm/include/asm/dma-mapping.h
b/arch/arm/include/asm/dma-mapping.h
index 5b579b9..f9e2f6c 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -268,6 +268,16 @@ extern void arm_dma_sync_sg_for_device(struct
device *, struct scatterlist *, in
extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
struct dma_attrs *attrs);
+/*
+ * Map scatterlist pages for the device and return a dma address
+ */
+extern dma_addr_t arm_dma_map_sgtable(struct device *dev, struct sgtable *sgt,
+ enum dma_data_direction dir, struct dma_attrs *attrs);
+/*
+ * Unmap the dma address
+ */
+extern void arm_dma_unmap(struct device *, dma_addr_t iova, int size,
+ enum dma_data_direction dir, struct dma_attrs *attrs);
#endif /* __KERNEL__ */
#endif
--
1.7.9.5
This will allow me to call functions that have multiple arguments if fastpath fails.
This is required to support ticket mutexes, because they need to be able to pass an
extra argument to the fail function.
Originally I duplicated the functions, by adding __mutex_fastpath_lock_retval_arg.
This ended up being just a duplication of the existing function, so a way to test
if fastpath was called ended up being better.
This also cleaned up the reservation mutex patch some by being able to call an
atomic_set instead of atomic_xchg, and making it easier to detect if the wrong
unlock function was previously used.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst(a)canonical.com>
---
arch/ia64/include/asm/mutex.h | 10 ++++------
arch/powerpc/include/asm/mutex.h | 10 ++++------
arch/sh/include/asm/mutex-llsc.h | 4 ++--
arch/x86/include/asm/mutex_32.h | 11 ++++-------
arch/x86/include/asm/mutex_64.h | 11 ++++-------
include/asm-generic/mutex-dec.h | 10 ++++------
include/asm-generic/mutex-null.h | 2 +-
include/asm-generic/mutex-xchg.h | 10 ++++------
kernel/mutex.c | 32 ++++++++++++++------------------
9 files changed, 41 insertions(+), 59 deletions(-)
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index bed73a6..f41e66d 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -29,17 +29,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns.
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
- return fail_fn(count);
+ return -1;
return 0;
}
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 5399f7e..127ab23 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -82,17 +82,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns.
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(__mutex_dec_return_lock(count) < 0))
- return fail_fn(count);
+ return -1;
return 0;
}
diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h
index 090358a..dad29b6 100644
--- a/arch/sh/include/asm/mutex-llsc.h
+++ b/arch/sh/include/asm/mutex-llsc.h
@@ -37,7 +37,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
}
static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
{
int __done, __res;
@@ -51,7 +51,7 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
: "t");
if (unlikely(!__done || __res != 0))
- __res = fail_fn(count);
+ __res = -1;
return __res;
}
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 03f90c8..b7f6b34 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -42,17 +42,14 @@ do { \
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or 1 otherwise.
*/
-static inline int __mutex_fastpath_lock_retval(atomic_t *count,
- int (*fail_fn)(atomic_t *))
+static inline int __mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_dec_return(count) < 0))
- return fail_fn(count);
+ return -1;
else
return 0;
}
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 68a87b0..2c543ff 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -37,17 +37,14 @@ do { \
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
-static inline int __mutex_fastpath_lock_retval(atomic_t *count,
- int (*fail_fn)(atomic_t *))
+static inline int __mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_dec_return(count) < 0))
- return fail_fn(count);
+ return -1;
else
return 0;
}
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index f104af7..d4f9fb4 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -28,17 +28,15 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns.
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_dec_return(count) < 0))
- return fail_fn(count);
+ return -1;
return 0;
}
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
index e1bbbc7..efd6206 100644
--- a/include/asm-generic/mutex-null.h
+++ b/include/asm-generic/mutex-null.h
@@ -11,7 +11,7 @@
#define _ASM_GENERIC_MUTEX_NULL_H
#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count)
-#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count)
+#define __mutex_fastpath_lock_retval(count, fail_fn) (-1)
#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count)
#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
#define __mutex_slowpath_needs_to_unlock() 1
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index c04e0db..f169ec0 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -39,18 +39,16 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
* __mutex_fastpath_lock_retval - try to take the lock by moving the count
* from 1 to a 0 value
* @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
*
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function returns 0 if the fastpath succeeds,
- * or anything the slow path function returns
+ * Change the count from 1 to a value lower than 1. This function returns 0
+ * if the fastpath succeeds, or -1 otherwise.
*/
static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count)
{
if (unlikely(atomic_xchg(count, 0) != 1))
if (likely(atomic_xchg(count, -1) != 1))
- return fail_fn(count);
+ return -1;
return 0;
}
diff --git a/kernel/mutex.c b/kernel/mutex.c
index a307cc9..5ac4522 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -350,10 +350,10 @@ __mutex_unlock_slowpath(atomic_t *lock_count)
* mutex_lock_interruptible() and mutex_trylock().
*/
static noinline int __sched
-__mutex_lock_killable_slowpath(atomic_t *lock_count);
+__mutex_lock_killable_slowpath(struct mutex *lock);
static noinline int __sched
-__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
+__mutex_lock_interruptible_slowpath(struct mutex *lock);
/**
* mutex_lock_interruptible - acquire the mutex, interruptible
@@ -371,12 +371,12 @@ int __sched mutex_lock_interruptible(struct mutex *lock)
int ret;
might_sleep();
- ret = __mutex_fastpath_lock_retval
- (&lock->count, __mutex_lock_interruptible_slowpath);
- if (!ret)
+ ret = __mutex_fastpath_lock_retval(&lock->count);
+ if (likely(!ret)) {
mutex_set_owner(lock);
-
- return ret;
+ return 0;
+ } else
+ return __mutex_lock_interruptible_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_interruptible);
@@ -386,12 +386,12 @@ int __sched mutex_lock_killable(struct mutex *lock)
int ret;
might_sleep();
- ret = __mutex_fastpath_lock_retval
- (&lock->count, __mutex_lock_killable_slowpath);
- if (!ret)
+ ret = __mutex_fastpath_lock_retval(&lock->count);
+ if (likely(!ret)) {
mutex_set_owner(lock);
-
- return ret;
+ return 0;
+ } else
+ return __mutex_lock_killable_slowpath(lock);
}
EXPORT_SYMBOL(mutex_lock_killable);
@@ -404,18 +404,14 @@ __mutex_lock_slowpath(atomic_t *lock_count)
}
static noinline int __sched
-__mutex_lock_killable_slowpath(atomic_t *lock_count)
+__mutex_lock_killable_slowpath(struct mutex *lock)
{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
-
return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
-__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
+__mutex_lock_interruptible_slowpath(struct mutex *lock)
{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
-
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
}
#endif
On Tue, Feb 5, 2013 at 2:27 PM, Laurent Pinchart
<laurent.pinchart(a)ideasonboard.com> wrote:
> Hello,
>
> We've hosted a CDF meeting at the FOSDEM on Sunday morning. Here's a summary
> of the discussions.
>
> I would like to start with a big thank to UrLab, the ULB university hacker
> space, for providing us with a meeting room.
>
> The meeting would of course not have been successful without the wide range of
> participants, so I also want to thank all the people who woke up on Sunday
> morning to attend the meeting :-)
>
> (The CC list is pretty long, please let me know - by private e-mail in order
> not to spam the list - if you would like not to receive future CDF-related e-
> mails directly)
>
> 0. Abbreviations
> ----------------
>
> DBI - Display Bus Interface, a parallel video control and data bus that
> transmits data using parallel data, read/write, chip select and address
> signals, similarly to 8051-style microcontroller parallel busses. This is a
> mixed video control and data bus.
>
> DPI - Display Pixel Interface, a parallel video data bus that transmits data
> using parallel data, h/v sync and clock signals. This is a video data bus
> only.
>
> DSI - Display Serial Interface, a serial video control and data bus that
> transmits data using one or more differential serial lines. This is a mixed
> video control and data bus.
>
> DT - Device Tree, a representation of a hardware system as a tree of physical
> devices with associated properties.
>
> SFI - Simple Firmware Interface, a lightweight method for firmware to export
> static tables to the operating system. Those tables can contain display device
> topology information.
>
> VBT - Video BIOS Table, a block of data residing in the video BIOS that can
> contain display device topology information.
>
> 1. Goals
> --------
>
> The meeting started with a brief discussion about the CDF goals.
>
> Tomi Valkeinin and Tomasz Figa have sent RFC patches to show their views of
> what CDF could/should be. Many others have provided very valuable feedback.
> Given the early development stage propositions were sometimes contradictory,
> and focused on different areas of interest. We have thus started the meeting
> with a discussion about what CDF should try to achieve, and what it shouldn't.
>
> CDF has two main purposes. The original goal was to support display panels in
> a platform- and subsystem-independent way. While mostly useful for embedded
> systems, the emergence of platforms such as Intel Medfield and ARM-based PCs
> that blends the embedded and PC worlds makes panel support useful for the PC
> world as well.
>
> The second purpose is to provide a cross-subsystem interface to support video
> encoders. The idea originally came from a generalisation of the original RFC
> that supported panels only. While encoder support is considered as lower
> priority than display panel support by developers focussed on display
> controller driver (Intel, Renesas, ST Ericsson, TI), companies that produce
> video encoders (Analog Devices, and likely others) don't share that point of
> view and would like to provide a single encoder driver that can be used in
> both KMS and V4L2 drivers.
>
> Both display panels and encoders are thus the target of a lot of attention,
> depending on the audience. As long as none of them is forgotten in CDF, the
> overall agreement was that focussing on panels first is acceptable. Care shall
> be taken in that case to avoid any architecture that would make encoders
> support difficult or impossible.
>
> 2. Subsystems
> -------------
>
> Display panels are used in conjunction with FBDEV and KMS drivers. There was
> to the audience knowledge no V4L2 driver that needs to explicitly handle
> display panels. Even though at least one V4L2 output drivers (omap_vout) can
> output video to a display panel, it does so in conjunction with the KMS and/or
> FBDEV APIs that handle panel configuration. Panels are thus not exposed to
> V4L2 drivers.
>
> Encoders, on the other hand, are widely used in the V4L2 subsystem. Many V4L2
> devices output video in either analog (Composite, S-Video, VGA) or digital
> (DVI, HDMI) way.
>
> Display panel drivers don't need to be shared with the V4L2 subsystem.
> Furthermore, as the general opinion during the meeting was that the FBDEV
> subsystem should be considered as legacy and deprecate in the future,
> restricting panel support to KMS hasn't been considered by anyone as an issue.
> KMS will thus be the main target of display panel support in CDF, and FBDEV
> will be supported if that doesn't bring any drawback from an architecture
> point of view.
>
> Encoder drivers need to be shared with the V4L2 subsystem. Similarly to panel
> drivers, excluding FBDEV support from CDF isn't considered as an issue.
>
> 3. KMS Extensions
> -----------------
>
> The usefulness of V4L2 for output devices was questioned, and the possibility
> of using KMS for complex video devices usually associated with V4L2 was
> raised. The TI DaVinci 8xxx family is an example of chips that could benefit
> from KMS support.
>
> The KMS API is lacking support for deep-pipelining ("framebuffers" that are
> sourced from a data stream instead of a memory buffer) today. Extending the
> KMS API with deep-pipelining support was considered as a sensible goal that
> would mostly require the creation of a new KMS source object. Exposing the
> topology of the whole device would then be handled by the Media Controller
> API.
>
> Given that no evidence of this KMS extension being ready in a reasonable time
> frame exists, sharing encoder drivers with the V4L2 subsystem hasn't been
> seriously questioned.
>
> 4. Discovery and Initialization
> -------------------------------
>
> As CDF will split support for complete display devices across different
> drivers, the question of physical devices discovery and initialization caused
> concern among the audience.
>
> Topology and connectivity information can come from a wide variety of sources.
> Embedded platforms typically provide that information in platform data
> supplied by board code or through the device tree. PC platforms usually store
> the information in the firmware exposed through ACPI, SFI, VBT or other
> interfaces. Pluggable devices (PCI being the most common case) can also store
> the information on an on-board non-volatile memory or hardcode it in drivers.
>
> When using the device tree display entity information are bundled with the
> display entity device DT node. The associated driver shall thus extract itself
> information from the DT node. In all other cases the display entity driver
> shall not parse data from the information source directly, but shall instead
> received a platform data structure filled with data parsed by the display
> controller driver. In the most complex case a machine driver, similar to ASoC
> machine drivers, might be needed, in which case platform data could be
> provided by that machine driver.
>
> Display entity drivers are encouraged to internally fill a platform data
> structure from their DT node to reuse the same code path for both platform
> data- and DT-based initialization.
>
> 5. Bus Model
> ------------
>
> Display panels are connected to a video bus that transmits video data and
> optionally to a control bus. Those two busses can be separate physical
> interfaces or combined into a single physical interface.
>
> The Linux device model represents the system as a tree of devices (not to be
> confused by the device tree, abreviated as DT). The tree is organized around
> control busses, with every device being a child of its control bus master. For
> instance an I2C device will be a child of its I2C controller device, which can
> itself be a child of its parent PCI device.
>
> Display panels will be represented as Linux devices. They will have a single
> parent from the Linux device model point of view, but will be potentially
> connected to multiple physical busses. CDF thus needs to define what bus to
> select as the Linux parent bus.
>
> In theory any physical bus that the device is attached to can be selected as
> the parent bus. However, selecting a video data bus would depart from the
> traditional Linux device model that uses control busses only. This caused
> concern among several people who argued that not presenting the device to the
> kernel as attached to its control bus would bring issues in embedded system.
> Unlike on PC systems where the control bus master is usually the same physical
> device as the data bus master, embedded systems are made of a potentially
> complex assembly of completely unrelated devices. Not representing an I2C-
> controlled panel as a child of its I2C master in DT was thus frown upon, even
> though no clear agreement was reached on the subject.
>
> Panels can be divided in three categories based on their bus model.
>
> - No control bus
>
> Many panels don't offer any control interface. They are usually referred to as
> 'dumb panels' as they directly display the data received on their video bus
> without any configurable option. Panels in this category often use DPI is
> their video bus, but other options such as DSI (using the DSI video mode only)
> are possible.
>
> Panels with no control bus can be represented in the device model as platform
> devices, or as being attached to their video bus. In the later case we would
> need Linux busses for pure video data interfaces such as DPI or VGA. Nobody
> was particularly enthousiastic about this idea. Dumb panels will thus likely
> be represented as platform devices.
>
> - Separate video and control busses
>
> The typical case is a panel connected to an I2C or SPI bus that receives data
> through a DPI video interface or DSI video mode interface.
>
> Using a mixed control and video bus (such as DSI and DBI) for control only
> with a different bus for video data is possible in theory but very unlikely in
> practice (although the creativity of hardware developers should never be
> underestimated).
>
> Display panels that use a control bus supported by the Linux kernel should
> likely be represented as children of their control bus master. Other options
> are possible as mentioned above but were received without enthousiasm by most
> embedded kernel developers.
>
> When the control bus isn't supported by the kernel, a new bus type can be
> developed, or the panel can be represented as a platform device. The right
> option will likely very depending on the control bus.
>
> - Combined video and control busses
>
> When the two busses are combined in a single physical bus the panel device
> will obviously be represented as a child of that single physical bus.
>
> In such cases the control bus could expose video bus control methods. This
> would remove the need for a video source as proposed by Tomi Valkeinen in his
> CDF model. However, if the bus can be used for video data transfer in
> combination with a different control bus, a video source corresponding to the
> data bus will be needed.
>
> No decision has been taken on whether to use a video source in addition to the
> control bus in the combined busses case. Experimentation will be needed, and
> the right solution might depend on the bus type.
>
> - Multiple control busses
>
> One panel was mentioned as being connected to a DSI bus and an I2C bus. The
> DSI bus is used for both control and video, and the I2C bus for control only.
> configuring the panel requires sending commands through both DSI and I2C. The
> opinion on such panels was a large *sigh* followed by a "this should be
> handled by the device core, let's ask Greg KH".
>
> 6. Miscellaneous
> ----------------
>
> - If the OMAP3 DSS driver is used as a model for the DSI support
> implementation, Daniel Vetter requested the DSI bus lock semaphore to be
> killed as it prevents lockdep from working correctly (reference needed ;-)).
>
> - Do we need to support chaining several encoders ? We can come up with
> several theoretical use cases, some of them probably exist in real hardware,
> but the details are still a bit fuzzy.
So, a part which is completely omitted in this thread is how to handle
suspend/resume ordering. If you have multiple encoders which need to
be turned on/off in a given order at suspend/resume, how do you handle
that given the current scheme where they are just separate platform
drivers in drivers/video?
This problems occurs with drm/exynos in current 3.8 kernels for
example. On that platform, the DP driver and the FIMD driver will
suspend/resume in random order, and therefore fail resuming half the
time. Is there something which could be done in CDF to address that?
Stéphane
Hi,
While working with high-order page allocations (using `alloc_pages') I've
encountered some issues* with certain APIs and wanted to get a better
understanding of support for those APIs with high-order pages on ARM. In
short, I'm trying to give userspace access to those pages by using
`vm_insert_page' in an mmap handler. Without further ado, some
questions:
o vm_insert_page doesn't seem to work with high-order pages (it
eventually calls __flush_dcache_page which assumes pages of size
PAGE_SIZE). Is this analysis correct or am I missing something?
Things work fine if I use `remap_pfn_range' instead of
`vm_insert_page'. Things also seem to work if I use `vm_insert_page'
with an array of struct page * of size PAGE_SIZE (derived from the
high-order pages by picking out the PAGE_SIZE pages with
nth_page)...
o There's a comment in __dma_alloc (dma-alloc.c) to the effect that
__GFP_COMP is not supported on ARM. Is this true? The commit that
introduced this comment (ea2e7057) was actually ported from avr32
(3611553ef) so I'm curious about the basis for this claim...
I've tried pages of order 8 and order 4. The gfp flags I'm passing to
`alloc_pages' are (GFP_KERNEL | __GFP_HIGHMEM | __GFP_COMP).
Thanks!
* Some issues = in userspace mmap the buffer whose underlying mmap
handler is the one mentioned above, memset that to something and then
immediately check that the bytes are equal to whatever we just memset.
(With huge pages and vm_insert_page this test fails).
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
Add an iterator to walk through a scatter list a page at a time starting
at a specific page offset. As opposed to the mapping iterator this is
meant to be small, performing well even in simple loops like collecting
all pages on the scatterlist into an array or setting up an iommu table
based on the pages' DMA address.
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
include/linux/scatterlist.h | 48 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
[ Resending with proper email addresses. ]
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 4bd6c06..d22851c 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -231,6 +231,54 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
*/
#define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist))
+struct sg_page_iter {
+ struct scatterlist *sg;
+ int sg_pgoffset;
+ struct page *page;
+};
+
+static inline int
+sg_page_cnt(struct scatterlist *sg)
+{
+ BUG_ON(sg->offset || sg->length & ~PAGE_MASK);
+
+ return sg->length >> PAGE_SHIFT;
+}
+
+static inline void
+sg_page_iter_next(struct sg_page_iter *iter)
+{
+ while (iter->sg && iter->sg_pgoffset >= sg_page_cnt(iter->sg)) {
+ iter->sg_pgoffset -= sg_page_cnt(iter->sg);
+ iter->sg = sg_next(iter->sg);
+ }
+
+ if (iter->sg) {
+ iter->page = nth_page(sg_page(iter->sg), iter->sg_pgoffset);
+ iter->sg_pgoffset++;
+ }
+}
+
+static inline void
+sg_page_iter_start(struct sg_page_iter *iter, struct scatterlist *sglist,
+ unsigned long pgoffset)
+{
+ iter->sg = sglist;
+ iter->sg_pgoffset = pgoffset;
+ iter->page = NULL;
+
+ sg_page_iter_next(iter);
+}
+
+/*
+ * Simple sg page iterator, starting off at the given page offset. Each entry
+ * on the sglist must start at offset 0 and can contain only full pages.
+ * iter->page will point to the current page, iter->sg_pgoffset to the page
+ * offset within the sg holding that page.
+ */
+#define for_each_sg_page(sglist, iter, pgoffset) \
+ for (sg_page_iter_start((iter), (sglist), (pgoffset)); \
+ (iter)->sg; sg_page_iter_next(iter))
/*
* Mapping sg iterator
--
1.7.9.5
Hi Imre!
On Sat, Feb 09, 2013 at 05:27:33PM +0200, Imre Deak wrote:
> Add a helper to walk through a scatter list a page at a time. Needed by
> upcoming patches fixing the scatter list walking logic in the i915 driver.
Nice patch, but I think this would make a rather nice addition to the
common scatterlist api in scatterlist.h, maybe called sg_page_iter.
There's already another helper which does cpu mappings, but it has a
different use-case (gives you the page mapped, which we don't neeed and
can cope with not page-aligned sg tables). With dma-buf using sg tables I
expect more users of such a sg page iterator to pop up. Most possible
users of this will hang around on linaro-mm-sig, so please also cc that
besides the usual suspects.
Cheers, Daniel
>
> Signed-off-by: Imre Deak <imre.deak(a)intel.com>
> ---
> include/drm/drmP.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 44 insertions(+)
>
> diff --git a/include/drm/drmP.h b/include/drm/drmP.h
> index fad21c9..0c0c213 100644
> --- a/include/drm/drmP.h
> +++ b/include/drm/drmP.h
> @@ -1578,6 +1578,50 @@ extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data,
> extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request);
> extern int drm_sg_free(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> +struct drm_sg_iter {
> + struct scatterlist *sg;
> + int sg_offset;
Imo using sg_pfn_offset (i.e. sg_offset >> PAGE_SIZE) would make it
clearer that this is all about iterating page-aligned sg tables.
> + struct page *page;
> +};
> +
> +static inline int
> +__drm_sg_iter_seek(struct drm_sg_iter *iter)
> +{
> + while (iter->sg && iter->sg_offset >= iter->sg->length) {
> + iter->sg_offset -= iter->sg->length;
> + iter->sg = sg_next(iter->sg);
And adding a WARN_ON(sg->legnth & ~PAGE_MASK); here would enforce that.
> + }
> +
> + return iter->sg ? 0 : -1;
> +}
> +
> +static inline struct page *
> +drm_sg_iter_next(struct drm_sg_iter *iter)
> +{
> + struct page *page;
> +
> + if (__drm_sg_iter_seek(iter))
> + return NULL;
> +
> + page = nth_page(sg_page(iter->sg), iter->sg_offset >> PAGE_SHIFT);
> + iter->sg_offset = (iter->sg_offset + PAGE_SIZE) & PAGE_MASK;
> +
> + return page;
> +}
> +
> +static inline struct page *
> +drm_sg_iter_start(struct drm_sg_iter *iter, struct scatterlist *sg,
> + unsigned long offset)
> +{
> + iter->sg = sg;
> + iter->sg_offset = offset;
> +
> + return drm_sg_iter_next(iter);
> +}
> +
> +#define drm_for_each_sg_page(iter, sg, pgoffset) \
> + for ((iter)->page = drm_sg_iter_start((iter), (sg), (pgoffset));\
> + (iter)->page; (iter)->page = drm_sg_iter_next(iter))
Again, for the initialization I'd go with page numbers, not an offset in
bytes.
>
> /* ATI PCIGART support (ati_pcigart.h) */
> extern int drm_ati_pcigart_init(struct drm_device *dev,
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx(a)lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch