diff options
author | Samuel Ortiz <sameo@openedhand.com> | 2008-09-10 15:37:14 +0000 |
---|---|---|
committer | Samuel Ortiz <sameo@openedhand.com> | 2008-09-10 15:37:14 +0000 |
commit | 4f82c5078e46fa0799038df33ad1cd624cf54bde (patch) | |
tree | 0f7015a79d98b8469116903261390b91c9b5cef3 /meta | |
parent | f680c855cf7dd3a3481a28671bdf918dbfb551df (diff) | |
download | openembedded-core-4f82c5078e46fa0799038df33ad1cd624cf54bde.tar.gz openembedded-core-4f82c5078e46fa0799038df33ad1cd624cf54bde.tar.bz2 openembedded-core-4f82c5078e46fa0799038df33ad1cd624cf54bde.tar.xz openembedded-core-4f82c5078e46fa0799038df33ad1cd624cf54bde.zip |
linux-moblin2: Add moblin2 kernel
git-svn-id: https://svn.o-hand.com/repos/poky/trunk@5166 311d38ba-8fff-0310-9ca6-ca027cbcb966
Diffstat (limited to 'meta')
15 files changed, 29499 insertions, 0 deletions
diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0001_Export_shmem_file_setup_for_DRM-GEM.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0001_Export_shmem_file_setup_for_DRM-GEM.patch new file mode 100644 index 000000000..9589838af --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0001_Export_shmem_file_setup_for_DRM-GEM.patch @@ -0,0 +1,27 @@ +From: Keith Packard <keithp@keithp.com> +Date: Fri, 20 Jun 2008 07:08:06 +0000 (-0700) +Subject: Export shmem_file_setup for DRM-GEM +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=350ea3ece12744ae154bbc2ea13da6ba84ca5515 + +Export shmem_file_setup for DRM-GEM + +GEM needs to create shmem files to back buffer objects. Though currently +creation of files for objects could have been driven from userland, the +modesetting work will require allocation of buffer objects before userland +is running, for boot-time message display. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2582,6 +2582,7 @@ put_memory: + shmem_unacct_size(flags, size); + return ERR_PTR(error); + } ++EXPORT_SYMBOL(shmem_file_setup); + + /** + * shmem_zero_setup - setup a shared anonymous mapping + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0002_i915.Use_more_consistent_names_for_regs.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0002_i915.Use_more_consistent_names_for_regs.patch new file mode 100644 index 000000000..9a035b544 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0002_i915.Use_more_consistent_names_for_regs.patch @@ -0,0 +1,2739 @@ +From: Jesse Barnes <jbarnes@virtuousgeek.org> +Date: Tue, 29 Jul 2008 18:54:06 +0000 (-0700) +Subject: i915: Use more consistent names for regs, and store them in a separate file. +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=db1cbbd8c4d42e58e9acb3e7af59ad1bb238260d + +i915: Use more consistent names for regs, and store them in a separate file. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -40,11 +40,11 @@ int i915_wait_ring(struct drm_device * d + { + drm_i915_private_t *dev_priv = dev->dev_private; + drm_i915_ring_buffer_t *ring = &(dev_priv->ring); +- u32 last_head = I915_READ(LP_RING + RING_HEAD) & HEAD_ADDR; ++ u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + int i; + + for (i = 0; i < 10000; i++) { +- ring->head = I915_READ(LP_RING + RING_HEAD) & HEAD_ADDR; ++ ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->Size; +@@ -67,8 +67,8 @@ void i915_kernel_lost_context(struct drm + drm_i915_private_t *dev_priv = dev->dev_private; + drm_i915_ring_buffer_t *ring = &(dev_priv->ring); + +- ring->head = I915_READ(LP_RING + RING_HEAD) & HEAD_ADDR; +- ring->tail = I915_READ(LP_RING + RING_TAIL) & TAIL_ADDR; ++ ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; ++ ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->Size; +@@ -98,13 +98,13 @@ static int i915_dma_cleanup(struct drm_d + drm_pci_free(dev, dev_priv->status_page_dmah); + dev_priv->status_page_dmah = NULL; + /* Need to rewrite hardware status page */ +- I915_WRITE(0x02080, 0x1ffff000); ++ I915_WRITE(HWS_PGA, 0x1ffff000); + } + + if (dev_priv->status_gfx_addr) { + dev_priv->status_gfx_addr = 0; + drm_core_ioremapfree(&dev_priv->hws_map, dev); +- I915_WRITE(0x2080, 0x1ffff000); ++ I915_WRITE(HWS_PGA, 0x1ffff000); + } + + return 0; +@@ -170,7 +170,7 @@ static int i915_initialize(struct drm_de + dev_priv->dma_status_page = dev_priv->status_page_dmah->busaddr; + + memset(dev_priv->hw_status_page, 0, PAGE_SIZE); +- I915_WRITE(0x02080, dev_priv->dma_status_page); ++ I915_WRITE(HWS_PGA, dev_priv->dma_status_page); + } + DRM_DEBUG("Enabled hardware status page\n"); + return 0; +@@ -201,9 +201,9 @@ static int i915_dma_resume(struct drm_de + DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page); + + if (dev_priv->status_gfx_addr != 0) +- I915_WRITE(0x02080, dev_priv->status_gfx_addr); ++ I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); + else +- I915_WRITE(0x02080, dev_priv->dma_status_page); ++ I915_WRITE(HWS_PGA, dev_priv->dma_status_page); + DRM_DEBUG("Enabled hardware status page\n"); + + return 0; +@@ -402,8 +402,8 @@ static void i915_emit_breadcrumb(struct + dev_priv->sarea_priv->last_enqueue = dev_priv->counter = 1; + + BEGIN_LP_RING(4); +- OUT_RING(CMD_STORE_DWORD_IDX); +- OUT_RING(20); ++ OUT_RING(MI_STORE_DWORD_INDEX); ++ OUT_RING(5 << MI_STORE_DWORD_INDEX_SHIFT); + OUT_RING(dev_priv->counter); + OUT_RING(0); + ADVANCE_LP_RING(); +@@ -505,7 +505,7 @@ static int i915_dispatch_flip(struct drm + i915_kernel_lost_context(dev); + + BEGIN_LP_RING(2); +- OUT_RING(INST_PARSER_CLIENT | INST_OP_FLUSH | INST_FLUSH_MAP_CACHE); ++ OUT_RING(MI_FLUSH | MI_READ_FLUSH); + OUT_RING(0); + ADVANCE_LP_RING(); + +@@ -530,8 +530,8 @@ static int i915_dispatch_flip(struct drm + dev_priv->sarea_priv->last_enqueue = dev_priv->counter++; + + BEGIN_LP_RING(4); +- OUT_RING(CMD_STORE_DWORD_IDX); +- OUT_RING(20); ++ OUT_RING(MI_STORE_DWORD_INDEX); ++ OUT_RING(5 << MI_STORE_DWORD_INDEX_SHIFT); + OUT_RING(dev_priv->counter); + OUT_RING(0); + ADVANCE_LP_RING(); +@@ -728,8 +728,8 @@ static int i915_set_status_page(struct d + dev_priv->hw_status_page = dev_priv->hws_map.handle; + + memset(dev_priv->hw_status_page, 0, PAGE_SIZE); +- I915_WRITE(0x02080, dev_priv->status_gfx_addr); +- DRM_DEBUG("load hws 0x2080 with gfx mem 0x%x\n", ++ I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); ++ DRM_DEBUG("load hws HWS_PGA with gfx mem 0x%x\n", + dev_priv->status_gfx_addr); + DRM_DEBUG("load hws at %p\n", dev_priv->hw_status_page); + return 0; +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -279,13 +279,13 @@ static int i915_suspend(struct drm_devic + dev_priv->saveDSPASTRIDE = I915_READ(DSPASTRIDE); + dev_priv->saveDSPASIZE = I915_READ(DSPASIZE); + dev_priv->saveDSPAPOS = I915_READ(DSPAPOS); +- dev_priv->saveDSPABASE = I915_READ(DSPABASE); ++ dev_priv->saveDSPAADDR = I915_READ(DSPAADDR); + if (IS_I965G(dev)) { + dev_priv->saveDSPASURF = I915_READ(DSPASURF); + dev_priv->saveDSPATILEOFF = I915_READ(DSPATILEOFF); + } + i915_save_palette(dev, PIPE_A); +- dev_priv->savePIPEASTAT = I915_READ(I915REG_PIPEASTAT); ++ dev_priv->savePIPEASTAT = I915_READ(PIPEASTAT); + + /* Pipe & plane B info */ + dev_priv->savePIPEBCONF = I915_READ(PIPEBCONF); +@@ -307,13 +307,13 @@ static int i915_suspend(struct drm_devic + dev_priv->saveDSPBSTRIDE = I915_READ(DSPBSTRIDE); + dev_priv->saveDSPBSIZE = I915_READ(DSPBSIZE); + dev_priv->saveDSPBPOS = I915_READ(DSPBPOS); +- dev_priv->saveDSPBBASE = I915_READ(DSPBBASE); ++ dev_priv->saveDSPBADDR = I915_READ(DSPBADDR); + if (IS_I965GM(dev) || IS_IGD_GM(dev)) { + dev_priv->saveDSPBSURF = I915_READ(DSPBSURF); + dev_priv->saveDSPBTILEOFF = I915_READ(DSPBTILEOFF); + } + i915_save_palette(dev, PIPE_B); +- dev_priv->savePIPEBSTAT = I915_READ(I915REG_PIPEBSTAT); ++ dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT); + + /* CRT state */ + dev_priv->saveADPA = I915_READ(ADPA); +@@ -328,9 +328,9 @@ static int i915_suspend(struct drm_devic + dev_priv->saveLVDS = I915_READ(LVDS); + if (!IS_I830(dev) && !IS_845G(dev)) + dev_priv->savePFIT_CONTROL = I915_READ(PFIT_CONTROL); +- dev_priv->saveLVDSPP_ON = I915_READ(LVDSPP_ON); +- dev_priv->saveLVDSPP_OFF = I915_READ(LVDSPP_OFF); +- dev_priv->savePP_CYCLE = I915_READ(PP_CYCLE); ++ dev_priv->savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS); ++ dev_priv->savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS); ++ dev_priv->savePP_DIVISOR = I915_READ(PP_DIVISOR); + + /* FIXME: save TV & SDVO state */ + +@@ -341,19 +341,19 @@ static int i915_suspend(struct drm_devic + dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL); + + /* Interrupt state */ +- dev_priv->saveIIR = I915_READ(I915REG_INT_IDENTITY_R); +- dev_priv->saveIER = I915_READ(I915REG_INT_ENABLE_R); +- dev_priv->saveIMR = I915_READ(I915REG_INT_MASK_R); ++ dev_priv->saveIIR = I915_READ(IIR); ++ dev_priv->saveIER = I915_READ(IER); ++ dev_priv->saveIMR = I915_READ(IMR); + + /* VGA state */ +- dev_priv->saveVCLK_DIVISOR_VGA0 = I915_READ(VCLK_DIVISOR_VGA0); +- dev_priv->saveVCLK_DIVISOR_VGA1 = I915_READ(VCLK_DIVISOR_VGA1); +- dev_priv->saveVCLK_POST_DIV = I915_READ(VCLK_POST_DIV); ++ dev_priv->saveVGA0 = I915_READ(VGA0); ++ dev_priv->saveVGA1 = I915_READ(VGA1); ++ dev_priv->saveVGA_PD = I915_READ(VGA_PD); + dev_priv->saveVGACNTRL = I915_READ(VGACNTRL); + + /* Clock gating state */ + dev_priv->saveD_STATE = I915_READ(D_STATE); +- dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); ++ dev_priv->saveCG_2D_DIS = I915_READ(CG_2D_DIS); + + /* Cache mode state */ + dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); +@@ -363,7 +363,7 @@ static int i915_suspend(struct drm_devic + + /* Scratch space */ + for (i = 0; i < 16; i++) { +- dev_priv->saveSWF0[i] = I915_READ(SWF0 + (i << 2)); ++ dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2)); + dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2)); + } + for (i = 0; i < 3; i++) +@@ -424,7 +424,7 @@ static int i915_resume(struct drm_device + I915_WRITE(DSPASIZE, dev_priv->saveDSPASIZE); + I915_WRITE(DSPAPOS, dev_priv->saveDSPAPOS); + I915_WRITE(PIPEASRC, dev_priv->savePIPEASRC); +- I915_WRITE(DSPABASE, dev_priv->saveDSPABASE); ++ I915_WRITE(DSPAADDR, dev_priv->saveDSPAADDR); + I915_WRITE(DSPASTRIDE, dev_priv->saveDSPASTRIDE); + if (IS_I965G(dev)) { + I915_WRITE(DSPASURF, dev_priv->saveDSPASURF); +@@ -436,7 +436,7 @@ static int i915_resume(struct drm_device + i915_restore_palette(dev, PIPE_A); + /* Enable the plane */ + I915_WRITE(DSPACNTR, dev_priv->saveDSPACNTR); +- I915_WRITE(DSPABASE, I915_READ(DSPABASE)); ++ I915_WRITE(DSPAADDR, I915_READ(DSPAADDR)); + + /* Pipe & plane B info */ + if (dev_priv->saveDPLL_B & DPLL_VCO_ENABLE) { +@@ -466,7 +466,7 @@ static int i915_resume(struct drm_device + I915_WRITE(DSPBSIZE, dev_priv->saveDSPBSIZE); + I915_WRITE(DSPBPOS, dev_priv->saveDSPBPOS); + I915_WRITE(PIPEBSRC, dev_priv->savePIPEBSRC); +- I915_WRITE(DSPBBASE, dev_priv->saveDSPBBASE); ++ I915_WRITE(DSPBADDR, dev_priv->saveDSPBADDR); + I915_WRITE(DSPBSTRIDE, dev_priv->saveDSPBSTRIDE); + if (IS_I965G(dev)) { + I915_WRITE(DSPBSURF, dev_priv->saveDSPBSURF); +@@ -478,7 +478,7 @@ static int i915_resume(struct drm_device + i915_restore_palette(dev, PIPE_B); + /* Enable the plane */ + I915_WRITE(DSPBCNTR, dev_priv->saveDSPBCNTR); +- I915_WRITE(DSPBBASE, I915_READ(DSPBBASE)); ++ I915_WRITE(DSPBADDR, I915_READ(DSPBADDR)); + + /* CRT state */ + I915_WRITE(ADPA, dev_priv->saveADPA); +@@ -493,9 +493,9 @@ static int i915_resume(struct drm_device + + I915_WRITE(PFIT_PGM_RATIOS, dev_priv->savePFIT_PGM_RATIOS); + I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL); +- I915_WRITE(LVDSPP_ON, dev_priv->saveLVDSPP_ON); +- I915_WRITE(LVDSPP_OFF, dev_priv->saveLVDSPP_OFF); +- I915_WRITE(PP_CYCLE, dev_priv->savePP_CYCLE); ++ I915_WRITE(PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS); ++ I915_WRITE(PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS); ++ I915_WRITE(PP_DIVISOR, dev_priv->savePP_DIVISOR); + I915_WRITE(PP_CONTROL, dev_priv->savePP_CONTROL); + + /* FIXME: restore TV & SDVO state */ +@@ -508,14 +508,14 @@ static int i915_resume(struct drm_device + + /* VGA state */ + I915_WRITE(VGACNTRL, dev_priv->saveVGACNTRL); +- I915_WRITE(VCLK_DIVISOR_VGA0, dev_priv->saveVCLK_DIVISOR_VGA0); +- I915_WRITE(VCLK_DIVISOR_VGA1, dev_priv->saveVCLK_DIVISOR_VGA1); +- I915_WRITE(VCLK_POST_DIV, dev_priv->saveVCLK_POST_DIV); ++ I915_WRITE(VGA0, dev_priv->saveVGA0); ++ I915_WRITE(VGA1, dev_priv->saveVGA1); ++ I915_WRITE(VGA_PD, dev_priv->saveVGA_PD); + udelay(150); + + /* Clock gating state */ + I915_WRITE (D_STATE, dev_priv->saveD_STATE); +- I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D); ++ I915_WRITE(CG_2D_DIS, dev_priv->saveCG_2D_DIS); + + /* Cache mode state */ + I915_WRITE (CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000); +@@ -524,7 +524,7 @@ static int i915_resume(struct drm_device + I915_WRITE (MI_ARB_STATE, dev_priv->saveMI_ARB_STATE | 0xffff0000); + + for (i = 0; i < 16; i++) { +- I915_WRITE(SWF0 + (i << 2), dev_priv->saveSWF0[i]); ++ I915_WRITE(SWF00 + (i << 2), dev_priv->saveSWF0[i]); + I915_WRITE(SWF10 + (i << 2), dev_priv->saveSWF1[i+7]); + } + for (i = 0; i < 3; i++) +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -30,6 +30,8 @@ + #ifndef _I915_DRV_H_ + #define _I915_DRV_H_ + ++#include "i915_reg.h" ++ + /* General customization: + */ + +@@ -138,7 +140,7 @@ typedef struct drm_i915_private { + u32 saveDSPASTRIDE; + u32 saveDSPASIZE; + u32 saveDSPAPOS; +- u32 saveDSPABASE; ++ u32 saveDSPAADDR; + u32 saveDSPASURF; + u32 saveDSPATILEOFF; + u32 savePFIT_PGM_RATIOS; +@@ -159,24 +161,24 @@ typedef struct drm_i915_private { + u32 saveDSPBSTRIDE; + u32 saveDSPBSIZE; + u32 saveDSPBPOS; +- u32 saveDSPBBASE; ++ u32 saveDSPBADDR; + u32 saveDSPBSURF; + u32 saveDSPBTILEOFF; +- u32 saveVCLK_DIVISOR_VGA0; +- u32 saveVCLK_DIVISOR_VGA1; +- u32 saveVCLK_POST_DIV; ++ u32 saveVGA0; ++ u32 saveVGA1; ++ u32 saveVGA_PD; + u32 saveVGACNTRL; + u32 saveADPA; + u32 saveLVDS; +- u32 saveLVDSPP_ON; +- u32 saveLVDSPP_OFF; ++ u32 savePP_ON_DELAYS; ++ u32 savePP_OFF_DELAYS; + u32 saveDVOA; + u32 saveDVOB; + u32 saveDVOC; + u32 savePP_ON; + u32 savePP_OFF; + u32 savePP_CONTROL; +- u32 savePP_CYCLE; ++ u32 savePP_DIVISOR; + u32 savePFIT_CONTROL; + u32 save_palette_a[256]; + u32 save_palette_b[256]; +@@ -189,7 +191,7 @@ typedef struct drm_i915_private { + u32 saveIMR; + u32 saveCACHE_MODE_0; + u32 saveD_STATE; +- u32 saveDSPCLK_GATE_D; ++ u32 saveCG_2D_DIS; + u32 saveMI_ARB_STATE; + u32 saveSWF0[16]; + u32 saveSWF1[16]; +@@ -283,816 +285,26 @@ extern void i915_mem_release(struct drm_ + if (I915_VERBOSE) DRM_DEBUG("ADVANCE_LP_RING %x\n", outring); \ + dev_priv->ring.tail = outring; \ + dev_priv->ring.space -= outcount * 4; \ +- I915_WRITE(LP_RING + RING_TAIL, outring); \ ++ I915_WRITE(PRB0_TAIL, outring); \ + } while(0) + +-extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); +- +-/* Extended config space */ +-#define LBB 0xf4 +- +-/* VGA stuff */ +- +-#define VGA_ST01_MDA 0x3ba +-#define VGA_ST01_CGA 0x3da +- +-#define VGA_MSR_WRITE 0x3c2 +-#define VGA_MSR_READ 0x3cc +-#define VGA_MSR_MEM_EN (1<<1) +-#define VGA_MSR_CGA_MODE (1<<0) +- +-#define VGA_SR_INDEX 0x3c4 +-#define VGA_SR_DATA 0x3c5 +- +-#define VGA_AR_INDEX 0x3c0 +-#define VGA_AR_VID_EN (1<<5) +-#define VGA_AR_DATA_WRITE 0x3c0 +-#define VGA_AR_DATA_READ 0x3c1 +- +-#define VGA_GR_INDEX 0x3ce +-#define VGA_GR_DATA 0x3cf +-/* GR05 */ +-#define VGA_GR_MEM_READ_MODE_SHIFT 3 +-#define VGA_GR_MEM_READ_MODE_PLANE 1 +-/* GR06 */ +-#define VGA_GR_MEM_MODE_MASK 0xc +-#define VGA_GR_MEM_MODE_SHIFT 2 +-#define VGA_GR_MEM_A0000_AFFFF 0 +-#define VGA_GR_MEM_A0000_BFFFF 1 +-#define VGA_GR_MEM_B0000_B7FFF 2 +-#define VGA_GR_MEM_B0000_BFFFF 3 +- +-#define VGA_DACMASK 0x3c6 +-#define VGA_DACRX 0x3c7 +-#define VGA_DACWX 0x3c8 +-#define VGA_DACDATA 0x3c9 +- +-#define VGA_CR_INDEX_MDA 0x3b4 +-#define VGA_CR_DATA_MDA 0x3b5 +-#define VGA_CR_INDEX_CGA 0x3d4 +-#define VGA_CR_DATA_CGA 0x3d5 +- +-#define GFX_OP_USER_INTERRUPT ((0<<29)|(2<<23)) +-#define GFX_OP_BREAKPOINT_INTERRUPT ((0<<29)|(1<<23)) +-#define CMD_REPORT_HEAD (7<<23) +-#define CMD_STORE_DWORD_IDX ((0x21<<23) | 0x1) +-#define CMD_OP_BATCH_BUFFER ((0x0<<29)|(0x30<<23)|0x1) +- +-#define INST_PARSER_CLIENT 0x00000000 +-#define INST_OP_FLUSH 0x02000000 +-#define INST_FLUSH_MAP_CACHE 0x00000001 +- +-#define BB1_START_ADDR_MASK (~0x7) +-#define BB1_PROTECTED (1<<0) +-#define BB1_UNPROTECTED (0<<0) +-#define BB2_END_ADDR_MASK (~0x7) +- +-/* Framebuffer compression */ +-#define FBC_CFB_BASE 0x03200 /* 4k page aligned */ +-#define FBC_LL_BASE 0x03204 /* 4k page aligned */ +-#define FBC_CONTROL 0x03208 +-#define FBC_CTL_EN (1<<31) +-#define FBC_CTL_PERIODIC (1<<30) +-#define FBC_CTL_INTERVAL_SHIFT (16) +-#define FBC_CTL_UNCOMPRESSIBLE (1<<14) +-#define FBC_CTL_STRIDE_SHIFT (5) +-#define FBC_CTL_FENCENO (1<<0) +-#define FBC_COMMAND 0x0320c +-#define FBC_CMD_COMPRESS (1<<0) +-#define FBC_STATUS 0x03210 +-#define FBC_STAT_COMPRESSING (1<<31) +-#define FBC_STAT_COMPRESSED (1<<30) +-#define FBC_STAT_MODIFIED (1<<29) +-#define FBC_STAT_CURRENT_LINE (1<<0) +-#define FBC_CONTROL2 0x03214 +-#define FBC_CTL_FENCE_DBL (0<<4) +-#define FBC_CTL_IDLE_IMM (0<<2) +-#define FBC_CTL_IDLE_FULL (1<<2) +-#define FBC_CTL_IDLE_LINE (2<<2) +-#define FBC_CTL_IDLE_DEBUG (3<<2) +-#define FBC_CTL_CPU_FENCE (1<<1) +-#define FBC_CTL_PLANEA (0<<0) +-#define FBC_CTL_PLANEB (1<<0) +-#define FBC_FENCE_OFF 0x0321b +- +-#define FBC_LL_SIZE (1536) +-#define FBC_LL_PAD (32) +- +-/* Interrupt bits: +- */ +-#define USER_INT_FLAG (1<<1) +-#define VSYNC_PIPEB_FLAG (1<<5) +-#define VSYNC_PIPEA_FLAG (1<<7) +-#define HWB_OOM_FLAG (1<<13) /* binner out of memory */ +- +-#define I915REG_HWSTAM 0x02098 +-#define I915REG_INT_IDENTITY_R 0x020a4 +-#define I915REG_INT_MASK_R 0x020a8 +-#define I915REG_INT_ENABLE_R 0x020a0 +- +-#define I915REG_PIPEASTAT 0x70024 +-#define I915REG_PIPEBSTAT 0x71024 +- +-#define I915_VBLANK_INTERRUPT_ENABLE (1UL<<17) +-#define I915_VBLANK_CLEAR (1UL<<1) +- +-#define SRX_INDEX 0x3c4 +-#define SRX_DATA 0x3c5 +-#define SR01 1 +-#define SR01_SCREEN_OFF (1<<5) +- +-#define PPCR 0x61204 +-#define PPCR_ON (1<<0) +- +-#define DVOB 0x61140 +-#define DVOB_ON (1<<31) +-#define DVOC 0x61160 +-#define DVOC_ON (1<<31) +-#define LVDS 0x61180 +-#define LVDS_ON (1<<31) +- +-#define ADPA 0x61100 +-#define ADPA_DPMS_MASK (~(3<<10)) +-#define ADPA_DPMS_ON (0<<10) +-#define ADPA_DPMS_SUSPEND (1<<10) +-#define ADPA_DPMS_STANDBY (2<<10) +-#define ADPA_DPMS_OFF (3<<10) +- +-#define NOPID 0x2094 +-#define LP_RING 0x2030 +-#define HP_RING 0x2040 +-/* The binner has its own ring buffer: +- */ +-#define HWB_RING 0x2400 +- +-#define RING_TAIL 0x00 +-#define TAIL_ADDR 0x001FFFF8 +-#define RING_HEAD 0x04 +-#define HEAD_WRAP_COUNT 0xFFE00000 +-#define HEAD_WRAP_ONE 0x00200000 +-#define HEAD_ADDR 0x001FFFFC +-#define RING_START 0x08 +-#define START_ADDR 0x0xFFFFF000 +-#define RING_LEN 0x0C +-#define RING_NR_PAGES 0x001FF000 +-#define RING_REPORT_MASK 0x00000006 +-#define RING_REPORT_64K 0x00000002 +-#define RING_REPORT_128K 0x00000004 +-#define RING_NO_REPORT 0x00000000 +-#define RING_VALID_MASK 0x00000001 +-#define RING_VALID 0x00000001 +-#define RING_INVALID 0x00000000 +- +-/* Instruction parser error reg: +- */ +-#define IPEIR 0x2088 +- +-/* Scratch pad debug 0 reg: +- */ +-#define SCPD0 0x209c +- +-/* Error status reg: +- */ +-#define ESR 0x20b8 +- +-/* Secondary DMA fetch address debug reg: +- */ +-#define DMA_FADD_S 0x20d4 +- +-/* Memory Interface Arbitration State +- */ +-#define MI_ARB_STATE 0x20e4 +- +-/* Cache mode 0 reg. +- * - Manipulating render cache behaviour is central +- * to the concept of zone rendering, tuning this reg can help avoid +- * unnecessary render cache reads and even writes (for z/stencil) +- * at beginning and end of scene. +- * +- * - To change a bit, write to this reg with a mask bit set and the +- * bit of interest either set or cleared. EG: (BIT<<16) | BIT to set. +- */ +-#define Cache_Mode_0 0x2120 +-#define CACHE_MODE_0 0x2120 +-#define CM0_MASK_SHIFT 16 +-#define CM0_IZ_OPT_DISABLE (1<<6) +-#define CM0_ZR_OPT_DISABLE (1<<5) +-#define CM0_DEPTH_EVICT_DISABLE (1<<4) +-#define CM0_COLOR_EVICT_DISABLE (1<<3) +-#define CM0_DEPTH_WRITE_DISABLE (1<<1) +-#define CM0_RC_OP_FLUSH_DISABLE (1<<0) +- +- +-/* Graphics flush control. A CPU write flushes the GWB of all writes. +- * The data is discarded. +- */ +-#define GFX_FLSH_CNTL 0x2170 +- +-/* Binner control. Defines the location of the bin pointer list: +- */ +-#define BINCTL 0x2420 +-#define BC_MASK (1 << 9) +- +-/* Binned scene info. +- */ +-#define BINSCENE 0x2428 +-#define BS_OP_LOAD (1 << 8) +-#define BS_MASK (1 << 22) +- +-/* Bin command parser debug reg: +- */ +-#define BCPD 0x2480 +- +-/* Bin memory control debug reg: +- */ +-#define BMCD 0x2484 +- +-/* Bin data cache debug reg: +- */ +-#define BDCD 0x2488 +- +-/* Binner pointer cache debug reg: +- */ +-#define BPCD 0x248c +- +-/* Binner scratch pad debug reg: +- */ +-#define BINSKPD 0x24f0 +- +-/* HWB scratch pad debug reg: +- */ +-#define HWBSKPD 0x24f4 +- +-/* Binner memory pool reg: +- */ +-#define BMP_BUFFER 0x2430 +-#define BMP_PAGE_SIZE_4K (0 << 10) +-#define BMP_BUFFER_SIZE_SHIFT 1 +-#define BMP_ENABLE (1 << 0) +- +-/* Get/put memory from the binner memory pool: +- */ +-#define BMP_GET 0x2438 +-#define BMP_PUT 0x2440 +-#define BMP_OFFSET_SHIFT 5 +- +-/* 3D state packets: +- */ +-#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) +- +-#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +-#define SC_UPDATE_SCISSOR (0x1<<1) +-#define SC_ENABLE_MASK (0x1<<0) +-#define SC_ENABLE (0x1<<0) +- +-#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) +- +-#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) +-#define SCI_YMIN_MASK (0xffff<<16) +-#define SCI_XMIN_MASK (0xffff<<0) +-#define SCI_YMAX_MASK (0xffff<<16) +-#define SCI_XMAX_MASK (0xffff<<0) +- +-#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +-#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) +-#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) +-#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +-#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) +-#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) +-#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) +- +-#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) +- +-#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) +-#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +-#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +-#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +-#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) +-#define XY_SRC_COPY_BLT_DST_TILED (1<<11) +- +-#define MI_BATCH_BUFFER ((0x30<<23)|1) +-#define MI_BATCH_BUFFER_START (0x31<<23) +-#define MI_BATCH_BUFFER_END (0xA<<23) +-#define MI_BATCH_NON_SECURE (1) +-#define MI_BATCH_NON_SECURE_I965 (1<<8) +- +-#define MI_WAIT_FOR_EVENT ((0x3<<23)) +-#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +-#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +-#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) +- +-#define MI_LOAD_SCAN_LINES_INCL ((0x12<<23)) +- +-#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) +-#define ASYNC_FLIP (1<<22) +-#define DISPLAY_PLANE_A (0<<20) +-#define DISPLAY_PLANE_B (1<<20) +- +-/* Display regs */ +-#define DSPACNTR 0x70180 +-#define DSPBCNTR 0x71180 +-#define DISPPLANE_SEL_PIPE_MASK (1<<24) +- +-/* Define the region of interest for the binner: +- */ +-#define CMD_OP_BIN_CONTROL ((0x3<<29)|(0x1d<<24)|(0x84<<16)|4) +- +-#define CMD_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) +- +-#define CMD_MI_FLUSH (0x04 << 23) +-#define MI_NO_WRITE_FLUSH (1 << 2) +-#define MI_READ_FLUSH (1 << 0) +-#define MI_EXE_FLUSH (1 << 1) +-#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ +-#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ +- +-#define BREADCRUMB_BITS 31 +-#define BREADCRUMB_MASK ((1U << BREADCRUMB_BITS) - 1) +- +-#define READ_BREADCRUMB(dev_priv) (((volatile u32*)(dev_priv->hw_status_page))[5]) +-#define READ_HWSP(dev_priv, reg) (((volatile u32*)(dev_priv->hw_status_page))[reg]) +- +-#define BLC_PWM_CTL 0x61254 +-#define BACKLIGHT_MODULATION_FREQ_SHIFT (17) +- +-#define BLC_PWM_CTL2 0x61250 + /** +- * This is the most significant 15 bits of the number of backlight cycles in a +- * complete cycle of the modulated backlight control. ++ * Reads a dword out of the status page, which is written to from the command ++ * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or ++ * MI_STORE_DATA_IMM. ++ * ++ * The following dwords have a reserved meaning: ++ * 0: ISR copy, updated when an ISR bit not set in the HWSTAM changes. ++ * 4: ring 0 head pointer ++ * 5: ring 1 head pointer (915-class) ++ * 6: ring 2 head pointer (915-class) + * +- * The actual value is this field multiplied by two. ++ * The area from dword 0x10 to 0x3ff is available for driver usage. + */ +-#define BACKLIGHT_MODULATION_FREQ_MASK (0x7fff << 17) +-#define BLM_LEGACY_MODE (1 << 16) +-/** +- * This is the number of cycles out of the backlight modulation cycle for which +- * the backlight is on. +- * +- * This field must be no greater than the number of cycles in the complete +- * backlight modulation cycle. +- */ +-#define BACKLIGHT_DUTY_CYCLE_SHIFT (0) +-#define BACKLIGHT_DUTY_CYCLE_MASK (0xffff) +- +-#define I915_GCFGC 0xf0 +-#define I915_LOW_FREQUENCY_ENABLE (1 << 7) +-#define I915_DISPLAY_CLOCK_190_200_MHZ (0 << 4) +-#define I915_DISPLAY_CLOCK_333_MHZ (4 << 4) +-#define I915_DISPLAY_CLOCK_MASK (7 << 4) +- +-#define I855_HPLLCC 0xc0 +-#define I855_CLOCK_CONTROL_MASK (3 << 0) +-#define I855_CLOCK_133_200 (0 << 0) +-#define I855_CLOCK_100_200 (1 << 0) +-#define I855_CLOCK_100_133 (2 << 0) +-#define I855_CLOCK_166_250 (3 << 0) +- +-/* p317, 319 +- */ +-#define VCLK2_VCO_M 0x6008 /* treat as 16 bit? (includes msbs) */ +-#define VCLK2_VCO_N 0x600a +-#define VCLK2_VCO_DIV_SEL 0x6012 +- +-#define VCLK_DIVISOR_VGA0 0x6000 +-#define VCLK_DIVISOR_VGA1 0x6004 +-#define VCLK_POST_DIV 0x6010 +-/** Selects a post divisor of 4 instead of 2. */ +-# define VGA1_PD_P2_DIV_4 (1 << 15) +-/** Overrides the p2 post divisor field */ +-# define VGA1_PD_P1_DIV_2 (1 << 13) +-# define VGA1_PD_P1_SHIFT 8 +-/** P1 value is 2 greater than this field */ +-# define VGA1_PD_P1_MASK (0x1f << 8) +-/** Selects a post divisor of 4 instead of 2. */ +-# define VGA0_PD_P2_DIV_4 (1 << 7) +-/** Overrides the p2 post divisor field */ +-# define VGA0_PD_P1_DIV_2 (1 << 5) +-# define VGA0_PD_P1_SHIFT 0 +-/** P1 value is 2 greater than this field */ +-# define VGA0_PD_P1_MASK (0x1f << 0) +- +-/* PCI D state control register */ +-#define D_STATE 0x6104 +-#define DSPCLK_GATE_D 0x6200 +- +-/* I830 CRTC registers */ +-#define HTOTAL_A 0x60000 +-#define HBLANK_A 0x60004 +-#define HSYNC_A 0x60008 +-#define VTOTAL_A 0x6000c +-#define VBLANK_A 0x60010 +-#define VSYNC_A 0x60014 +-#define PIPEASRC 0x6001c +-#define BCLRPAT_A 0x60020 +-#define VSYNCSHIFT_A 0x60028 +- +-#define HTOTAL_B 0x61000 +-#define HBLANK_B 0x61004 +-#define HSYNC_B 0x61008 +-#define VTOTAL_B 0x6100c +-#define VBLANK_B 0x61010 +-#define VSYNC_B 0x61014 +-#define PIPEBSRC 0x6101c +-#define BCLRPAT_B 0x61020 +-#define VSYNCSHIFT_B 0x61028 +- +-#define PP_STATUS 0x61200 +-# define PP_ON (1 << 31) +-/** +- * Indicates that all dependencies of the panel are on: +- * +- * - PLL enabled +- * - pipe enabled +- * - LVDS/DVOB/DVOC on +- */ +-# define PP_READY (1 << 30) +-# define PP_SEQUENCE_NONE (0 << 28) +-# define PP_SEQUENCE_ON (1 << 28) +-# define PP_SEQUENCE_OFF (2 << 28) +-# define PP_SEQUENCE_MASK 0x30000000 +-#define PP_CONTROL 0x61204 +-# define POWER_TARGET_ON (1 << 0) +- +-#define LVDSPP_ON 0x61208 +-#define LVDSPP_OFF 0x6120c +-#define PP_CYCLE 0x61210 +- +-#define PFIT_CONTROL 0x61230 +-# define PFIT_ENABLE (1 << 31) +-# define PFIT_PIPE_MASK (3 << 29) +-# define PFIT_PIPE_SHIFT 29 +-# define VERT_INTERP_DISABLE (0 << 10) +-# define VERT_INTERP_BILINEAR (1 << 10) +-# define VERT_INTERP_MASK (3 << 10) +-# define VERT_AUTO_SCALE (1 << 9) +-# define HORIZ_INTERP_DISABLE (0 << 6) +-# define HORIZ_INTERP_BILINEAR (1 << 6) +-# define HORIZ_INTERP_MASK (3 << 6) +-# define HORIZ_AUTO_SCALE (1 << 5) +-# define PANEL_8TO6_DITHER_ENABLE (1 << 3) +- +-#define PFIT_PGM_RATIOS 0x61234 +-# define PFIT_VERT_SCALE_MASK 0xfff00000 +-# define PFIT_HORIZ_SCALE_MASK 0x0000fff0 +- +-#define PFIT_AUTO_RATIOS 0x61238 +- +- +-#define DPLL_A 0x06014 +-#define DPLL_B 0x06018 +-# define DPLL_VCO_ENABLE (1 << 31) +-# define DPLL_DVO_HIGH_SPEED (1 << 30) +-# define DPLL_SYNCLOCK_ENABLE (1 << 29) +-# define DPLL_VGA_MODE_DIS (1 << 28) +-# define DPLLB_MODE_DAC_SERIAL (1 << 26) /* i915 */ +-# define DPLLB_MODE_LVDS (2 << 26) /* i915 */ +-# define DPLL_MODE_MASK (3 << 26) +-# define DPLL_DAC_SERIAL_P2_CLOCK_DIV_10 (0 << 24) /* i915 */ +-# define DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 (1 << 24) /* i915 */ +-# define DPLLB_LVDS_P2_CLOCK_DIV_14 (0 << 24) /* i915 */ +-# define DPLLB_LVDS_P2_CLOCK_DIV_7 (1 << 24) /* i915 */ +-# define DPLL_P2_CLOCK_DIV_MASK 0x03000000 /* i915 */ +-# define DPLL_FPA01_P1_POST_DIV_MASK 0x00ff0000 /* i915 */ +-/** +- * The i830 generation, in DAC/serial mode, defines p1 as two plus this +- * bitfield, or just 2 if PLL_P1_DIVIDE_BY_TWO is set. +- */ +-# define DPLL_FPA01_P1_POST_DIV_MASK_I830 0x001f0000 +-/** +- * The i830 generation, in LVDS mode, defines P1 as the bit number set within +- * this field (only one bit may be set). +- */ +-# define DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS 0x003f0000 +-# define DPLL_FPA01_P1_POST_DIV_SHIFT 16 +-# define PLL_P2_DIVIDE_BY_4 (1 << 23) /* i830, required in DVO non-gang */ +-# define PLL_P1_DIVIDE_BY_TWO (1 << 21) /* i830 */ +-# define PLL_REF_INPUT_DREFCLK (0 << 13) +-# define PLL_REF_INPUT_TVCLKINA (1 << 13) /* i830 */ +-# define PLL_REF_INPUT_TVCLKINBC (2 << 13) /* SDVO TVCLKIN */ +-# define PLLB_REF_INPUT_SPREADSPECTRUMIN (3 << 13) +-# define PLL_REF_INPUT_MASK (3 << 13) +-# define PLL_LOAD_PULSE_PHASE_SHIFT 9 +-/* +- * Parallel to Serial Load Pulse phase selection. +- * Selects the phase for the 10X DPLL clock for the PCIe +- * digital display port. The range is 4 to 13; 10 or more +- * is just a flip delay. The default is 6 +- */ +-# define PLL_LOAD_PULSE_PHASE_MASK (0xf << PLL_LOAD_PULSE_PHASE_SHIFT) +-# define DISPLAY_RATE_SELECT_FPA1 (1 << 8) +- +-/** +- * SDVO multiplier for 945G/GM. Not used on 965. +- * +- * \sa DPLL_MD_UDI_MULTIPLIER_MASK +- */ +-# define SDVO_MULTIPLIER_MASK 0x000000ff +-# define SDVO_MULTIPLIER_SHIFT_HIRES 4 +-# define SDVO_MULTIPLIER_SHIFT_VGA 0 +- +-/** @defgroup DPLL_MD +- * @{ +- */ +-/** Pipe A SDVO/UDI clock multiplier/divider register for G965. */ +-#define DPLL_A_MD 0x0601c +-/** Pipe B SDVO/UDI clock multiplier/divider register for G965. */ +-#define DPLL_B_MD 0x06020 +-/** +- * UDI pixel divider, controlling how many pixels are stuffed into a packet. +- * +- * Value is pixels minus 1. Must be set to 1 pixel for SDVO. +- */ +-# define DPLL_MD_UDI_DIVIDER_MASK 0x3f000000 +-# define DPLL_MD_UDI_DIVIDER_SHIFT 24 +-/** UDI pixel divider for VGA, same as DPLL_MD_UDI_DIVIDER_MASK. */ +-# define DPLL_MD_VGA_UDI_DIVIDER_MASK 0x003f0000 +-# define DPLL_MD_VGA_UDI_DIVIDER_SHIFT 16 +-/** +- * SDVO/UDI pixel multiplier. +- * +- * SDVO requires that the bus clock rate be between 1 and 2 Ghz, and the bus +- * clock rate is 10 times the DPLL clock. At low resolution/refresh rate +- * modes, the bus rate would be below the limits, so SDVO allows for stuffing +- * dummy bytes in the datastream at an increased clock rate, with both sides of +- * the link knowing how many bytes are fill. +- * +- * So, for a mode with a dotclock of 65Mhz, we would want to double the clock +- * rate to 130Mhz to get a bus rate of 1.30Ghz. The DPLL clock rate would be +- * set to 130Mhz, and the SDVO multiplier set to 2x in this register and +- * through an SDVO command. +- * +- * This register field has values of multiplication factor minus 1, with +- * a maximum multiplier of 5 for SDVO. +- */ +-# define DPLL_MD_UDI_MULTIPLIER_MASK 0x00003f00 +-# define DPLL_MD_UDI_MULTIPLIER_SHIFT 8 +-/** SDVO/UDI pixel multiplier for VGA, same as DPLL_MD_UDI_MULTIPLIER_MASK. +- * This best be set to the default value (3) or the CRT won't work. No, +- * I don't entirely understand what this does... +- */ +-# define DPLL_MD_VGA_UDI_MULTIPLIER_MASK 0x0000003f +-# define DPLL_MD_VGA_UDI_MULTIPLIER_SHIFT 0 +-/** @} */ +- +-#define DPLL_TEST 0x606c +-# define DPLLB_TEST_SDVO_DIV_1 (0 << 22) +-# define DPLLB_TEST_SDVO_DIV_2 (1 << 22) +-# define DPLLB_TEST_SDVO_DIV_4 (2 << 22) +-# define DPLLB_TEST_SDVO_DIV_MASK (3 << 22) +-# define DPLLB_TEST_N_BYPASS (1 << 19) +-# define DPLLB_TEST_M_BYPASS (1 << 18) +-# define DPLLB_INPUT_BUFFER_ENABLE (1 << 16) +-# define DPLLA_TEST_N_BYPASS (1 << 3) +-# define DPLLA_TEST_M_BYPASS (1 << 2) +-# define DPLLA_INPUT_BUFFER_ENABLE (1 << 0) +- +-#define ADPA 0x61100 +-#define ADPA_DAC_ENABLE (1<<31) +-#define ADPA_DAC_DISABLE 0 +-#define ADPA_PIPE_SELECT_MASK (1<<30) +-#define ADPA_PIPE_A_SELECT 0 +-#define ADPA_PIPE_B_SELECT (1<<30) +-#define ADPA_USE_VGA_HVPOLARITY (1<<15) +-#define ADPA_SETS_HVPOLARITY 0 +-#define ADPA_VSYNC_CNTL_DISABLE (1<<11) +-#define ADPA_VSYNC_CNTL_ENABLE 0 +-#define ADPA_HSYNC_CNTL_DISABLE (1<<10) +-#define ADPA_HSYNC_CNTL_ENABLE 0 +-#define ADPA_VSYNC_ACTIVE_HIGH (1<<4) +-#define ADPA_VSYNC_ACTIVE_LOW 0 +-#define ADPA_HSYNC_ACTIVE_HIGH (1<<3) +-#define ADPA_HSYNC_ACTIVE_LOW 0 +- +-#define FPA0 0x06040 +-#define FPA1 0x06044 +-#define FPB0 0x06048 +-#define FPB1 0x0604c +-# define FP_N_DIV_MASK 0x003f0000 +-# define FP_N_DIV_SHIFT 16 +-# define FP_M1_DIV_MASK 0x00003f00 +-# define FP_M1_DIV_SHIFT 8 +-# define FP_M2_DIV_MASK 0x0000003f +-# define FP_M2_DIV_SHIFT 0 +- +- +-#define PORT_HOTPLUG_EN 0x61110 +-# define SDVOB_HOTPLUG_INT_EN (1 << 26) +-# define SDVOC_HOTPLUG_INT_EN (1 << 25) +-# define TV_HOTPLUG_INT_EN (1 << 18) +-# define CRT_HOTPLUG_INT_EN (1 << 9) +-# define CRT_HOTPLUG_FORCE_DETECT (1 << 3) +- +-#define PORT_HOTPLUG_STAT 0x61114 +-# define CRT_HOTPLUG_INT_STATUS (1 << 11) +-# define TV_HOTPLUG_INT_STATUS (1 << 10) +-# define CRT_HOTPLUG_MONITOR_MASK (3 << 8) +-# define CRT_HOTPLUG_MONITOR_COLOR (3 << 8) +-# define CRT_HOTPLUG_MONITOR_MONO (2 << 8) +-# define CRT_HOTPLUG_MONITOR_NONE (0 << 8) +-# define SDVOC_HOTPLUG_INT_STATUS (1 << 7) +-# define SDVOB_HOTPLUG_INT_STATUS (1 << 6) +- +-#define SDVOB 0x61140 +-#define SDVOC 0x61160 +-#define SDVO_ENABLE (1 << 31) +-#define SDVO_PIPE_B_SELECT (1 << 30) +-#define SDVO_STALL_SELECT (1 << 29) +-#define SDVO_INTERRUPT_ENABLE (1 << 26) +-/** +- * 915G/GM SDVO pixel multiplier. +- * +- * Programmed value is multiplier - 1, up to 5x. +- * +- * \sa DPLL_MD_UDI_MULTIPLIER_MASK +- */ +-#define SDVO_PORT_MULTIPLY_MASK (7 << 23) +-#define SDVO_PORT_MULTIPLY_SHIFT 23 +-#define SDVO_PHASE_SELECT_MASK (15 << 19) +-#define SDVO_PHASE_SELECT_DEFAULT (6 << 19) +-#define SDVO_CLOCK_OUTPUT_INVERT (1 << 18) +-#define SDVOC_GANG_MODE (1 << 16) +-#define SDVO_BORDER_ENABLE (1 << 7) +-#define SDVOB_PCIE_CONCURRENCY (1 << 3) +-#define SDVO_DETECTED (1 << 2) +-/* Bits to be preserved when writing */ +-#define SDVOB_PRESERVE_MASK ((1 << 17) | (1 << 16) | (1 << 14)) +-#define SDVOC_PRESERVE_MASK (1 << 17) +- +-/** @defgroup LVDS +- * @{ +- */ +-/** +- * This register controls the LVDS output enable, pipe selection, and data +- * format selection. +- * +- * All of the clock/data pairs are force powered down by power sequencing. +- */ +-#define LVDS 0x61180 +-/** +- * Enables the LVDS port. This bit must be set before DPLLs are enabled, as +- * the DPLL semantics change when the LVDS is assigned to that pipe. +- */ +-# define LVDS_PORT_EN (1 << 31) +-/** Selects pipe B for LVDS data. Must be set on pre-965. */ +-# define LVDS_PIPEB_SELECT (1 << 30) +- +-/** +- * Enables the A0-A2 data pairs and CLKA, containing 18 bits of color data per +- * pixel. +- */ +-# define LVDS_A0A2_CLKA_POWER_MASK (3 << 8) +-# define LVDS_A0A2_CLKA_POWER_DOWN (0 << 8) +-# define LVDS_A0A2_CLKA_POWER_UP (3 << 8) +-/** +- * Controls the A3 data pair, which contains the additional LSBs for 24 bit +- * mode. Only enabled if LVDS_A0A2_CLKA_POWER_UP also indicates it should be +- * on. +- */ +-# define LVDS_A3_POWER_MASK (3 << 6) +-# define LVDS_A3_POWER_DOWN (0 << 6) +-# define LVDS_A3_POWER_UP (3 << 6) +-/** +- * Controls the CLKB pair. This should only be set when LVDS_B0B3_POWER_UP +- * is set. +- */ +-# define LVDS_CLKB_POWER_MASK (3 << 4) +-# define LVDS_CLKB_POWER_DOWN (0 << 4) +-# define LVDS_CLKB_POWER_UP (3 << 4) +- +-/** +- * Controls the B0-B3 data pairs. This must be set to match the DPLL p2 +- * setting for whether we are in dual-channel mode. The B3 pair will +- * additionally only be powered up when LVDS_A3_POWER_UP is set. +- */ +-# define LVDS_B0B3_POWER_MASK (3 << 2) +-# define LVDS_B0B3_POWER_DOWN (0 << 2) +-# define LVDS_B0B3_POWER_UP (3 << 2) +- +-#define PIPEACONF 0x70008 +-#define PIPEACONF_ENABLE (1<<31) +-#define PIPEACONF_DISABLE 0 +-#define PIPEACONF_DOUBLE_WIDE (1<<30) +-#define I965_PIPECONF_ACTIVE (1<<30) +-#define PIPEACONF_SINGLE_WIDE 0 +-#define PIPEACONF_PIPE_UNLOCKED 0 +-#define PIPEACONF_PIPE_LOCKED (1<<25) +-#define PIPEACONF_PALETTE 0 +-#define PIPEACONF_GAMMA (1<<24) +-#define PIPECONF_FORCE_BORDER (1<<25) +-#define PIPECONF_PROGRESSIVE (0 << 21) +-#define PIPECONF_INTERLACE_W_FIELD_INDICATION (6 << 21) +-#define PIPECONF_INTERLACE_FIELD_0_ONLY (7 << 21) +- +-#define DSPARB 0x70030 +-#define DSPARB_CSTART_MASK (0x7f << 7) +-#define DSPARB_CSTART_SHIFT 7 +-#define DSPARB_BSTART_MASK (0x7f) +-#define DSPARB_BSTART_SHIFT 0 +- +-#define PIPEBCONF 0x71008 +-#define PIPEBCONF_ENABLE (1<<31) +-#define PIPEBCONF_DISABLE 0 +-#define PIPEBCONF_DOUBLE_WIDE (1<<30) +-#define PIPEBCONF_DISABLE 0 +-#define PIPEBCONF_GAMMA (1<<24) +-#define PIPEBCONF_PALETTE 0 +- +-#define PIPEBGCMAXRED 0x71010 +-#define PIPEBGCMAXGREEN 0x71014 +-#define PIPEBGCMAXBLUE 0x71018 +-#define PIPEBSTAT 0x71024 +-#define PIPEBFRAMEHIGH 0x71040 +-#define PIPEBFRAMEPIXEL 0x71044 +- +-#define DSPACNTR 0x70180 +-#define DSPBCNTR 0x71180 +-#define DISPLAY_PLANE_ENABLE (1<<31) +-#define DISPLAY_PLANE_DISABLE 0 +-#define DISPPLANE_GAMMA_ENABLE (1<<30) +-#define DISPPLANE_GAMMA_DISABLE 0 +-#define DISPPLANE_PIXFORMAT_MASK (0xf<<26) +-#define DISPPLANE_8BPP (0x2<<26) +-#define DISPPLANE_15_16BPP (0x4<<26) +-#define DISPPLANE_16BPP (0x5<<26) +-#define DISPPLANE_32BPP_NO_ALPHA (0x6<<26) +-#define DISPPLANE_32BPP (0x7<<26) +-#define DISPPLANE_STEREO_ENABLE (1<<25) +-#define DISPPLANE_STEREO_DISABLE 0 +-#define DISPPLANE_SEL_PIPE_MASK (1<<24) +-#define DISPPLANE_SEL_PIPE_A 0 +-#define DISPPLANE_SEL_PIPE_B (1<<24) +-#define DISPPLANE_SRC_KEY_ENABLE (1<<22) +-#define DISPPLANE_SRC_KEY_DISABLE 0 +-#define DISPPLANE_LINE_DOUBLE (1<<20) +-#define DISPPLANE_NO_LINE_DOUBLE 0 +-#define DISPPLANE_STEREO_POLARITY_FIRST 0 +-#define DISPPLANE_STEREO_POLARITY_SECOND (1<<18) +-/* plane B only */ +-#define DISPPLANE_ALPHA_TRANS_ENABLE (1<<15) +-#define DISPPLANE_ALPHA_TRANS_DISABLE 0 +-#define DISPPLANE_SPRITE_ABOVE_DISPLAYA 0 +-#define DISPPLANE_SPRITE_ABOVE_OVERLAY (1) +- +-#define DSPABASE 0x70184 +-#define DSPASTRIDE 0x70188 +- +-#define DSPBBASE 0x71184 +-#define DSPBADDR DSPBBASE +-#define DSPBSTRIDE 0x71188 +- +-#define DSPAKEYVAL 0x70194 +-#define DSPAKEYMASK 0x70198 +- +-#define DSPAPOS 0x7018C /* reserved */ +-#define DSPASIZE 0x70190 +-#define DSPBPOS 0x7118C +-#define DSPBSIZE 0x71190 +- +-#define DSPASURF 0x7019C +-#define DSPATILEOFF 0x701A4 +- +-#define DSPBSURF 0x7119C +-#define DSPBTILEOFF 0x711A4 +- +-#define VGACNTRL 0x71400 +-# define VGA_DISP_DISABLE (1 << 31) +-# define VGA_2X_MODE (1 << 30) +-# define VGA_PIPE_B_SELECT (1 << 29) +- +-/* +- * Some BIOS scratch area registers. The 845 (and 830?) store the amount +- * of video memory available to the BIOS in SWF1. +- */ +- +-#define SWF0 0x71410 +- +-/* +- * 855 scratch registers. +- */ +-#define SWF10 0x70410 +- +-#define SWF30 0x72414 +- +-/* +- * Overlay registers. These are overlay registers accessed via MMIO. +- * Those loaded via the overlay register page are defined in i830_video.c. +- */ +-#define OVADD 0x30000 +- +-#define DOVSTA 0x30008 +-#define OC_BUF (0x3<<20) ++#define READ_HWSP(dev_priv, reg) (((volatile u32*)(dev_priv->hw_status_page))[reg]) ++#define READ_BREADCRUMB(dev_priv) READ_HWSP(dev_priv, 5) + +-#define OGAMC5 0x30010 +-#define OGAMC4 0x30014 +-#define OGAMC3 0x30018 +-#define OGAMC2 0x3001c +-#define OGAMC1 0x30020 +-#define OGAMC0 0x30024 +-/* +- * Palette registers +- */ +-#define PALETTE_A 0x0a000 +-#define PALETTE_B 0x0a800 ++extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); + + #define IS_I830(dev) ((dev)->pci_device == 0x3577) + #define IS_845G(dev) ((dev)->pci_device == 0x2562) +--- a/drivers/gpu/drm/i915/i915_irq.c ++++ b/drivers/gpu/drm/i915/i915_irq.c +@@ -31,10 +31,6 @@ + #include "i915_drm.h" + #include "i915_drv.h" + +-#define USER_INT_FLAG (1<<1) +-#define VSYNC_PIPEB_FLAG (1<<5) +-#define VSYNC_PIPEA_FLAG (1<<7) +- + #define MAX_NOPID ((u32)~0) + + /** +@@ -236,40 +232,43 @@ irqreturn_t i915_driver_irq_handler(DRM_ + u16 temp; + u32 pipea_stats, pipeb_stats; + +- pipea_stats = I915_READ(I915REG_PIPEASTAT); +- pipeb_stats = I915_READ(I915REG_PIPEBSTAT); ++ pipea_stats = I915_READ(PIPEASTAT); ++ pipeb_stats = I915_READ(PIPEBSTAT); + +- temp = I915_READ16(I915REG_INT_IDENTITY_R); ++ temp = I915_READ16(IIR); + +- temp &= (USER_INT_FLAG | VSYNC_PIPEA_FLAG | VSYNC_PIPEB_FLAG); ++ temp &= (I915_USER_INTERRUPT | ++ I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | ++ I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT); + + DRM_DEBUG("%s flag=%08x\n", __FUNCTION__, temp); + + if (temp == 0) + return IRQ_NONE; + +- I915_WRITE16(I915REG_INT_IDENTITY_R, temp); +- (void) I915_READ16(I915REG_INT_IDENTITY_R); ++ I915_WRITE16(IIR, temp); ++ (void) I915_READ16(IIR); + DRM_READMEMORYBARRIER(); + + dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); + +- if (temp & USER_INT_FLAG) ++ if (temp & I915_USER_INTERRUPT) + DRM_WAKEUP(&dev_priv->irq_queue); + +- if (temp & (VSYNC_PIPEA_FLAG | VSYNC_PIPEB_FLAG)) { ++ if (temp & (I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | ++ I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)) { + int vblank_pipe = dev_priv->vblank_pipe; + + if ((vblank_pipe & + (DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B)) + == (DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B)) { +- if (temp & VSYNC_PIPEA_FLAG) ++ if (temp & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) + atomic_inc(&dev->vbl_received); +- if (temp & VSYNC_PIPEB_FLAG) ++ if (temp & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) + atomic_inc(&dev->vbl_received2); +- } else if (((temp & VSYNC_PIPEA_FLAG) && ++ } else if (((temp & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) && + (vblank_pipe & DRM_I915_VBLANK_PIPE_A)) || +- ((temp & VSYNC_PIPEB_FLAG) && ++ ((temp & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) && + (vblank_pipe & DRM_I915_VBLANK_PIPE_B))) + atomic_inc(&dev->vbl_received); + +@@ -278,12 +277,12 @@ irqreturn_t i915_driver_irq_handler(DRM_ + + if (dev_priv->swaps_pending > 0) + drm_locked_tasklet(dev, i915_vblank_tasklet); +- I915_WRITE(I915REG_PIPEASTAT, ++ I915_WRITE(PIPEASTAT, + pipea_stats|I915_VBLANK_INTERRUPT_ENABLE| +- I915_VBLANK_CLEAR); +- I915_WRITE(I915REG_PIPEBSTAT, ++ PIPE_VBLANK_INTERRUPT_STATUS); ++ I915_WRITE(PIPEBSTAT, + pipeb_stats|I915_VBLANK_INTERRUPT_ENABLE| +- I915_VBLANK_CLEAR); ++ PIPE_VBLANK_INTERRUPT_STATUS); + } + + return IRQ_HANDLED; +@@ -304,12 +303,12 @@ static int i915_emit_irq(struct drm_devi + dev_priv->sarea_priv->last_enqueue = dev_priv->counter = 1; + + BEGIN_LP_RING(6); +- OUT_RING(CMD_STORE_DWORD_IDX); +- OUT_RING(20); ++ OUT_RING(MI_STORE_DWORD_INDEX); ++ OUT_RING(5 << MI_STORE_DWORD_INDEX_SHIFT); + OUT_RING(dev_priv->counter); + OUT_RING(0); + OUT_RING(0); +- OUT_RING(GFX_OP_USER_INTERRUPT); ++ OUT_RING(MI_USER_INTERRUPT); + ADVANCE_LP_RING(); + + return dev_priv->counter; +@@ -421,11 +420,11 @@ static void i915_enable_interrupt (struc + + flag = 0; + if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_A) +- flag |= VSYNC_PIPEA_FLAG; ++ flag |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT; + if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_B) +- flag |= VSYNC_PIPEB_FLAG; ++ flag |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; + +- I915_WRITE16(I915REG_INT_ENABLE_R, USER_INT_FLAG | flag); ++ I915_WRITE16(IER, I915_USER_INTERRUPT | flag); + } + + /* Set the vblank monitor pipe +@@ -465,11 +464,11 @@ int i915_vblank_pipe_get(struct drm_devi + return -EINVAL; + } + +- flag = I915_READ(I915REG_INT_ENABLE_R); ++ flag = I915_READ(IER); + pipe->pipe = 0; +- if (flag & VSYNC_PIPEA_FLAG) ++ if (flag & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) + pipe->pipe |= DRM_I915_VBLANK_PIPE_A; +- if (flag & VSYNC_PIPEB_FLAG) ++ if (flag & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) + pipe->pipe |= DRM_I915_VBLANK_PIPE_B; + + return 0; +@@ -587,9 +586,9 @@ void i915_driver_irq_preinstall(struct d + { + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + +- I915_WRITE16(I915REG_HWSTAM, 0xfffe); +- I915_WRITE16(I915REG_INT_MASK_R, 0x0); +- I915_WRITE16(I915REG_INT_ENABLE_R, 0x0); ++ I915_WRITE16(HWSTAM, 0xfffe); ++ I915_WRITE16(IMR, 0x0); ++ I915_WRITE16(IER, 0x0); + } + + void i915_driver_irq_postinstall(struct drm_device * dev) +@@ -614,10 +613,10 @@ void i915_driver_irq_uninstall(struct dr + if (!dev_priv) + return; + +- I915_WRITE16(I915REG_HWSTAM, 0xffff); +- I915_WRITE16(I915REG_INT_MASK_R, 0xffff); +- I915_WRITE16(I915REG_INT_ENABLE_R, 0x0); ++ I915_WRITE16(HWSTAM, 0xffff); ++ I915_WRITE16(IMR, 0xffff); ++ I915_WRITE16(IER, 0x0); + +- temp = I915_READ16(I915REG_INT_IDENTITY_R); +- I915_WRITE16(I915REG_INT_IDENTITY_R, temp); ++ temp = I915_READ16(IIR); ++ I915_WRITE16(IIR, temp); + } +--- /dev/null ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -0,0 +1,1405 @@ ++/* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _I915_REG_H_ ++#define _I915_REG_H_ ++ ++/* MCH MMIO space */ ++/** 915-945 and GM965 MCH register controlling DRAM channel access */ ++#define DCC 0x200 ++#define DCC_ADDRESSING_MODE_SINGLE_CHANNEL (0 << 0) ++#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC (1 << 0) ++#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED (2 << 0) ++#define DCC_ADDRESSING_MODE_MASK (3 << 0) ++#define DCC_CHANNEL_XOR_DISABLE (1 << 10) ++ ++/** 965 MCH register controlling DRAM channel configuration */ ++#define CHDECMISC 0x111 ++#define CHDECMISC_FLEXMEMORY (1 << 1) ++ ++/* ++ * The Bridge device's PCI config space has information about the ++ * fb aperture size and the amount of pre-reserved memory. ++ */ ++#define INTEL_GMCH_CTRL 0x52 ++#define INTEL_GMCH_ENABLED 0x4 ++#define INTEL_GMCH_MEM_MASK 0x1 ++#define INTEL_GMCH_MEM_64M 0x1 ++#define INTEL_GMCH_MEM_128M 0 ++ ++#define INTEL_855_GMCH_GMS_MASK (0x7 << 4) ++#define INTEL_855_GMCH_GMS_DISABLED (0x0 << 4) ++#define INTEL_855_GMCH_GMS_STOLEN_1M (0x1 << 4) ++#define INTEL_855_GMCH_GMS_STOLEN_4M (0x2 << 4) ++#define INTEL_855_GMCH_GMS_STOLEN_8M (0x3 << 4) ++#define INTEL_855_GMCH_GMS_STOLEN_16M (0x4 << 4) ++#define INTEL_855_GMCH_GMS_STOLEN_32M (0x5 << 4) ++ ++#define INTEL_915G_GMCH_GMS_STOLEN_48M (0x6 << 4) ++#define INTEL_915G_GMCH_GMS_STOLEN_64M (0x7 << 4) ++ ++/* PCI config space */ ++ ++#define HPLLCC 0xc0 /* 855 only */ ++#define GC_CLOCK_CONTROL_MASK (3 << 0) ++#define GC_CLOCK_133_200 (0 << 0) ++#define GC_CLOCK_100_200 (1 << 0) ++#define GC_CLOCK_100_133 (2 << 0) ++#define GC_CLOCK_166_250 (3 << 0) ++#define GCFGC 0xf0 /* 915+ only */ ++#define GC_LOW_FREQUENCY_ENABLE (1 << 7) ++#define GC_DISPLAY_CLOCK_190_200_MHZ (0 << 4) ++#define GC_DISPLAY_CLOCK_333_MHZ (4 << 4) ++#define GC_DISPLAY_CLOCK_MASK (7 << 4) ++#define LBB 0xf4 ++ ++/* VGA stuff */ ++ ++#define VGA_ST01_MDA 0x3ba ++#define VGA_ST01_CGA 0x3da ++ ++#define VGA_MSR_WRITE 0x3c2 ++#define VGA_MSR_READ 0x3cc ++#define VGA_MSR_MEM_EN (1<<1) ++#define VGA_MSR_CGA_MODE (1<<0) ++ ++#define VGA_SR_INDEX 0x3c4 ++#define VGA_SR_DATA 0x3c5 ++ ++#define VGA_AR_INDEX 0x3c0 ++#define VGA_AR_VID_EN (1<<5) ++#define VGA_AR_DATA_WRITE 0x3c0 ++#define VGA_AR_DATA_READ 0x3c1 ++ ++#define VGA_GR_INDEX 0x3ce ++#define VGA_GR_DATA 0x3cf ++/* GR05 */ ++#define VGA_GR_MEM_READ_MODE_SHIFT 3 ++#define VGA_GR_MEM_READ_MODE_PLANE 1 ++/* GR06 */ ++#define VGA_GR_MEM_MODE_MASK 0xc ++#define VGA_GR_MEM_MODE_SHIFT 2 ++#define VGA_GR_MEM_A0000_AFFFF 0 ++#define VGA_GR_MEM_A0000_BFFFF 1 ++#define VGA_GR_MEM_B0000_B7FFF 2 ++#define VGA_GR_MEM_B0000_BFFFF 3 ++ ++#define VGA_DACMASK 0x3c6 ++#define VGA_DACRX 0x3c7 ++#define VGA_DACWX 0x3c8 ++#define VGA_DACDATA 0x3c9 ++ ++#define VGA_CR_INDEX_MDA 0x3b4 ++#define VGA_CR_DATA_MDA 0x3b5 ++#define VGA_CR_INDEX_CGA 0x3d4 ++#define VGA_CR_DATA_CGA 0x3d5 ++ ++/* ++ * Memory interface instructions used by the kernel ++ */ ++#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) ++ ++#define MI_NOOP MI_INSTR(0, 0) ++#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) ++#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) ++#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) ++#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) ++#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) ++#define MI_FLUSH MI_INSTR(0x04, 0) ++#define MI_READ_FLUSH (1 << 0) ++#define MI_EXE_FLUSH (1 << 1) ++#define MI_NO_WRITE_FLUSH (1 << 2) ++#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ ++#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ ++#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) ++#define MI_REPORT_HEAD MI_INSTR(0x07, 0) ++#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) ++#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) ++#define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ ++#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) ++#define MI_STORE_DWORD_INDEX_SHIFT 2 ++#define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 1) ++#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) ++#define MI_BATCH_NON_SECURE (1) ++#define MI_BATCH_NON_SECURE_I965 (1<<8) ++#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) ++ ++/* ++ * 3D instructions used by the kernel ++ */ ++#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) ++ ++#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) ++#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) ++#define SC_UPDATE_SCISSOR (0x1<<1) ++#define SC_ENABLE_MASK (0x1<<0) ++#define SC_ENABLE (0x1<<0) ++#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) ++#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) ++#define SCI_YMIN_MASK (0xffff<<16) ++#define SCI_XMIN_MASK (0xffff<<0) ++#define SCI_YMAX_MASK (0xffff<<16) ++#define SCI_XMAX_MASK (0xffff<<0) ++#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) ++#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) ++#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) ++#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) ++#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) ++#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) ++#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) ++#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) ++#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) ++#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) ++#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) ++#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) ++#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) ++#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) ++#define BLT_DEPTH_8 (0<<24) ++#define BLT_DEPTH_16_565 (1<<24) ++#define BLT_DEPTH_16_1555 (2<<24) ++#define BLT_DEPTH_32 (3<<24) ++#define BLT_ROP_GXCOPY (0xcc<<16) ++#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ ++#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ ++#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) ++#define ASYNC_FLIP (1<<22) ++#define DISPLAY_PLANE_A (0<<20) ++#define DISPLAY_PLANE_B (1<<20) ++ ++/* ++ * Instruction and interrupt control regs ++ */ ++ ++#define PRB0_TAIL 0x02030 ++#define PRB0_HEAD 0x02034 ++#define PRB0_START 0x02038 ++#define PRB0_CTL 0x0203c ++#define TAIL_ADDR 0x001FFFF8 ++#define HEAD_WRAP_COUNT 0xFFE00000 ++#define HEAD_WRAP_ONE 0x00200000 ++#define HEAD_ADDR 0x001FFFFC ++#define RING_NR_PAGES 0x001FF000 ++#define RING_REPORT_MASK 0x00000006 ++#define RING_REPORT_64K 0x00000002 ++#define RING_REPORT_128K 0x00000004 ++#define RING_NO_REPORT 0x00000000 ++#define RING_VALID_MASK 0x00000001 ++#define RING_VALID 0x00000001 ++#define RING_INVALID 0x00000000 ++#define PRB1_TAIL 0x02040 /* 915+ only */ ++#define PRB1_HEAD 0x02044 /* 915+ only */ ++#define PRB1_START 0x02048 /* 915+ only */ ++#define PRB1_CTL 0x0204c /* 915+ only */ ++#define ACTHD_I965 0x02074 ++#define HWS_PGA 0x02080 ++#define HWS_ADDRESS_MASK 0xfffff000 ++#define HWS_START_ADDRESS_SHIFT 4 ++#define IPEIR 0x02088 ++#define NOPID 0x02094 ++#define HWSTAM 0x02098 ++#define SCPD0 0x0209c /* 915+ only */ ++#define IER 0x020a0 ++#define IIR 0x020a4 ++#define IMR 0x020a8 ++#define ISR 0x020ac ++#define I915_PIPE_CONTROL_NOTIFY_INTERRUPT (1<<18) ++#define I915_DISPLAY_PORT_INTERRUPT (1<<17) ++#define I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT (1<<15) ++#define I915_GMCH_THERMAL_SENSOR_EVENT_INTERRUPT (1<<14) ++#define I915_HWB_OOM_INTERRUPT (1<<13) ++#define I915_SYNC_STATUS_INTERRUPT (1<<12) ++#define I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT (1<<11) ++#define I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT (1<<10) ++#define I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT (1<<9) ++#define I915_DISPLAY_PLANE_C_FLIP_PENDING_INTERRUPT (1<<8) ++#define I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT (1<<7) ++#define I915_DISPLAY_PIPE_A_EVENT_INTERRUPT (1<<6) ++#define I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT (1<<5) ++#define I915_DISPLAY_PIPE_B_EVENT_INTERRUPT (1<<4) ++#define I915_DEBUG_INTERRUPT (1<<2) ++#define I915_USER_INTERRUPT (1<<1) ++#define I915_ASLE_INTERRUPT (1<<0) ++#define EIR 0x020b0 ++#define EMR 0x020b4 ++#define ESR 0x020b8 ++#define INSTPM 0x020c0 ++#define ACTHD 0x020c8 ++#define FW_BLC 0x020d8 ++#define FW_BLC_SELF 0x020e0 /* 915+ only */ ++#define MI_ARB_STATE 0x020e4 /* 915+ only */ ++#define CACHE_MODE_0 0x02120 /* 915+ only */ ++#define CM0_MASK_SHIFT 16 ++#define CM0_IZ_OPT_DISABLE (1<<6) ++#define CM0_ZR_OPT_DISABLE (1<<5) ++#define CM0_DEPTH_EVICT_DISABLE (1<<4) ++#define CM0_COLOR_EVICT_DISABLE (1<<3) ++#define CM0_DEPTH_WRITE_DISABLE (1<<1) ++#define CM0_RC_OP_FLUSH_DISABLE (1<<0) ++#define GFX_FLSH_CNTL 0x02170 /* 915+ only */ ++ ++/* ++ * Framebuffer compression (915+ only) ++ */ ++ ++#define FBC_CFB_BASE 0x03200 /* 4k page aligned */ ++#define FBC_LL_BASE 0x03204 /* 4k page aligned */ ++#define FBC_CONTROL 0x03208 ++#define FBC_CTL_EN (1<<31) ++#define FBC_CTL_PERIODIC (1<<30) ++#define FBC_CTL_INTERVAL_SHIFT (16) ++#define FBC_CTL_UNCOMPRESSIBLE (1<<14) ++#define FBC_CTL_STRIDE_SHIFT (5) ++#define FBC_CTL_FENCENO (1<<0) ++#define FBC_COMMAND 0x0320c ++#define FBC_CMD_COMPRESS (1<<0) ++#define FBC_STATUS 0x03210 ++#define FBC_STAT_COMPRESSING (1<<31) ++#define FBC_STAT_COMPRESSED (1<<30) ++#define FBC_STAT_MODIFIED (1<<29) ++#define FBC_STAT_CURRENT_LINE (1<<0) ++#define FBC_CONTROL2 0x03214 ++#define FBC_CTL_FENCE_DBL (0<<4) ++#define FBC_CTL_IDLE_IMM (0<<2) ++#define FBC_CTL_IDLE_FULL (1<<2) ++#define FBC_CTL_IDLE_LINE (2<<2) ++#define FBC_CTL_IDLE_DEBUG (3<<2) ++#define FBC_CTL_CPU_FENCE (1<<1) ++#define FBC_CTL_PLANEA (0<<0) ++#define FBC_CTL_PLANEB (1<<0) ++#define FBC_FENCE_OFF 0x0321b ++ ++#define FBC_LL_SIZE (1536) ++ ++/* ++ * GPIO regs ++ */ ++#define GPIOA 0x5010 ++#define GPIOB 0x5014 ++#define GPIOC 0x5018 ++#define GPIOD 0x501c ++#define GPIOE 0x5020 ++#define GPIOF 0x5024 ++#define GPIOG 0x5028 ++#define GPIOH 0x502c ++# define GPIO_CLOCK_DIR_MASK (1 << 0) ++# define GPIO_CLOCK_DIR_IN (0 << 1) ++# define GPIO_CLOCK_DIR_OUT (1 << 1) ++# define GPIO_CLOCK_VAL_MASK (1 << 2) ++# define GPIO_CLOCK_VAL_OUT (1 << 3) ++# define GPIO_CLOCK_VAL_IN (1 << 4) ++# define GPIO_CLOCK_PULLUP_DISABLE (1 << 5) ++# define GPIO_DATA_DIR_MASK (1 << 8) ++# define GPIO_DATA_DIR_IN (0 << 9) ++# define GPIO_DATA_DIR_OUT (1 << 9) ++# define GPIO_DATA_VAL_MASK (1 << 10) ++# define GPIO_DATA_VAL_OUT (1 << 11) ++# define GPIO_DATA_VAL_IN (1 << 12) ++# define GPIO_DATA_PULLUP_DISABLE (1 << 13) ++ ++/* ++ * Clock control & power management ++ */ ++ ++#define VGA0 0x6000 ++#define VGA1 0x6004 ++#define VGA_PD 0x6010 ++#define VGA0_PD_P2_DIV_4 (1 << 7) ++#define VGA0_PD_P1_DIV_2 (1 << 5) ++#define VGA0_PD_P1_SHIFT 0 ++#define VGA0_PD_P1_MASK (0x1f << 0) ++#define VGA1_PD_P2_DIV_4 (1 << 15) ++#define VGA1_PD_P1_DIV_2 (1 << 13) ++#define VGA1_PD_P1_SHIFT 8 ++#define VGA1_PD_P1_MASK (0x1f << 8) ++#define DPLL_A 0x06014 ++#define DPLL_B 0x06018 ++#define DPLL_VCO_ENABLE (1 << 31) ++#define DPLL_DVO_HIGH_SPEED (1 << 30) ++#define DPLL_SYNCLOCK_ENABLE (1 << 29) ++#define DPLL_VGA_MODE_DIS (1 << 28) ++#define DPLLB_MODE_DAC_SERIAL (1 << 26) /* i915 */ ++#define DPLLB_MODE_LVDS (2 << 26) /* i915 */ ++#define DPLL_MODE_MASK (3 << 26) ++#define DPLL_DAC_SERIAL_P2_CLOCK_DIV_10 (0 << 24) /* i915 */ ++#define DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 (1 << 24) /* i915 */ ++#define DPLLB_LVDS_P2_CLOCK_DIV_14 (0 << 24) /* i915 */ ++#define DPLLB_LVDS_P2_CLOCK_DIV_7 (1 << 24) /* i915 */ ++#define DPLL_P2_CLOCK_DIV_MASK 0x03000000 /* i915 */ ++#define DPLL_FPA01_P1_POST_DIV_MASK 0x00ff0000 /* i915 */ ++ ++#define I915_FIFO_UNDERRUN_STATUS (1UL<<31) ++#define I915_CRC_ERROR_ENABLE (1UL<<29) ++#define I915_CRC_DONE_ENABLE (1UL<<28) ++#define I915_GMBUS_EVENT_ENABLE (1UL<<27) ++#define I915_VSYNC_INTERRUPT_ENABLE (1UL<<25) ++#define I915_DISPLAY_LINE_COMPARE_ENABLE (1UL<<24) ++#define I915_DPST_EVENT_ENABLE (1UL<<23) ++#define I915_LEGACY_BLC_EVENT_ENABLE (1UL<<22) ++#define I915_ODD_FIELD_INTERRUPT_ENABLE (1UL<<21) ++#define I915_EVEN_FIELD_INTERRUPT_ENABLE (1UL<<20) ++#define I915_START_VBLANK_INTERRUPT_ENABLE (1UL<<18) /* 965 or later */ ++#define I915_VBLANK_INTERRUPT_ENABLE (1UL<<17) ++#define I915_OVERLAY_UPDATED_ENABLE (1UL<<16) ++#define I915_CRC_ERROR_INTERRUPT_STATUS (1UL<<13) ++#define I915_CRC_DONE_INTERRUPT_STATUS (1UL<<12) ++#define I915_GMBUS_INTERRUPT_STATUS (1UL<<11) ++#define I915_VSYNC_INTERRUPT_STATUS (1UL<<9) ++#define I915_DISPLAY_LINE_COMPARE_STATUS (1UL<<8) ++#define I915_DPST_EVENT_STATUS (1UL<<7) ++#define I915_LEGACY_BLC_EVENT_STATUS (1UL<<6) ++#define I915_ODD_FIELD_INTERRUPT_STATUS (1UL<<5) ++#define I915_EVEN_FIELD_INTERRUPT_STATUS (1UL<<4) ++#define I915_START_VBLANK_INTERRUPT_STATUS (1UL<<2) /* 965 or later */ ++#define I915_VBLANK_INTERRUPT_STATUS (1UL<<1) ++#define I915_OVERLAY_UPDATED_STATUS (1UL<<0) ++ ++#define SRX_INDEX 0x3c4 ++#define SRX_DATA 0x3c5 ++#define SR01 1 ++#define SR01_SCREEN_OFF (1<<5) ++ ++#define PPCR 0x61204 ++#define PPCR_ON (1<<0) ++ ++#define DVOB 0x61140 ++#define DVOB_ON (1<<31) ++#define DVOC 0x61160 ++#define DVOC_ON (1<<31) ++#define LVDS 0x61180 ++#define LVDS_ON (1<<31) ++ ++#define ADPA 0x61100 ++#define ADPA_DPMS_MASK (~(3<<10)) ++#define ADPA_DPMS_ON (0<<10) ++#define ADPA_DPMS_SUSPEND (1<<10) ++#define ADPA_DPMS_STANDBY (2<<10) ++#define ADPA_DPMS_OFF (3<<10) ++ ++#define RING_TAIL 0x00 ++#define TAIL_ADDR 0x001FFFF8 ++#define RING_HEAD 0x04 ++#define HEAD_WRAP_COUNT 0xFFE00000 ++#define HEAD_WRAP_ONE 0x00200000 ++#define HEAD_ADDR 0x001FFFFC ++#define RING_START 0x08 ++#define START_ADDR 0xFFFFF000 ++#define RING_LEN 0x0C ++#define RING_NR_PAGES 0x001FF000 ++#define RING_REPORT_MASK 0x00000006 ++#define RING_REPORT_64K 0x00000002 ++#define RING_REPORT_128K 0x00000004 ++#define RING_NO_REPORT 0x00000000 ++#define RING_VALID_MASK 0x00000001 ++#define RING_VALID 0x00000001 ++#define RING_INVALID 0x00000000 ++ ++/* Scratch pad debug 0 reg: ++ */ ++#define DPLL_FPA01_P1_POST_DIV_MASK_I830 0x001f0000 ++/* ++ * The i830 generation, in LVDS mode, defines P1 as the bit number set within ++ * this field (only one bit may be set). ++ */ ++#define DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS 0x003f0000 ++#define DPLL_FPA01_P1_POST_DIV_SHIFT 16 ++/* i830, required in DVO non-gang */ ++#define PLL_P2_DIVIDE_BY_4 (1 << 23) ++#define PLL_P1_DIVIDE_BY_TWO (1 << 21) /* i830 */ ++#define PLL_REF_INPUT_DREFCLK (0 << 13) ++#define PLL_REF_INPUT_TVCLKINA (1 << 13) /* i830 */ ++#define PLL_REF_INPUT_TVCLKINBC (2 << 13) /* SDVO TVCLKIN */ ++#define PLLB_REF_INPUT_SPREADSPECTRUMIN (3 << 13) ++#define PLL_REF_INPUT_MASK (3 << 13) ++#define PLL_LOAD_PULSE_PHASE_SHIFT 9 ++/* ++ * Parallel to Serial Load Pulse phase selection. ++ * Selects the phase for the 10X DPLL clock for the PCIe ++ * digital display port. The range is 4 to 13; 10 or more ++ * is just a flip delay. The default is 6 ++ */ ++#define PLL_LOAD_PULSE_PHASE_MASK (0xf << PLL_LOAD_PULSE_PHASE_SHIFT) ++#define DISPLAY_RATE_SELECT_FPA1 (1 << 8) ++/* ++ * SDVO multiplier for 945G/GM. Not used on 965. ++ */ ++#define SDVO_MULTIPLIER_MASK 0x000000ff ++#define SDVO_MULTIPLIER_SHIFT_HIRES 4 ++#define SDVO_MULTIPLIER_SHIFT_VGA 0 ++#define DPLL_A_MD 0x0601c /* 965+ only */ ++/* ++ * UDI pixel divider, controlling how many pixels are stuffed into a packet. ++ * ++ * Value is pixels minus 1. Must be set to 1 pixel for SDVO. ++ */ ++#define DPLL_MD_UDI_DIVIDER_MASK 0x3f000000 ++#define DPLL_MD_UDI_DIVIDER_SHIFT 24 ++/* UDI pixel divider for VGA, same as DPLL_MD_UDI_DIVIDER_MASK. */ ++#define DPLL_MD_VGA_UDI_DIVIDER_MASK 0x003f0000 ++#define DPLL_MD_VGA_UDI_DIVIDER_SHIFT 16 ++/* ++ * SDVO/UDI pixel multiplier. ++ * ++ * SDVO requires that the bus clock rate be between 1 and 2 Ghz, and the bus ++ * clock rate is 10 times the DPLL clock. At low resolution/refresh rate ++ * modes, the bus rate would be below the limits, so SDVO allows for stuffing ++ * dummy bytes in the datastream at an increased clock rate, with both sides of ++ * the link knowing how many bytes are fill. ++ * ++ * So, for a mode with a dotclock of 65Mhz, we would want to double the clock ++ * rate to 130Mhz to get a bus rate of 1.30Ghz. The DPLL clock rate would be ++ * set to 130Mhz, and the SDVO multiplier set to 2x in this register and ++ * through an SDVO command. ++ * ++ * This register field has values of multiplication factor minus 1, with ++ * a maximum multiplier of 5 for SDVO. ++ */ ++#define DPLL_MD_UDI_MULTIPLIER_MASK 0x00003f00 ++#define DPLL_MD_UDI_MULTIPLIER_SHIFT 8 ++/* ++ * SDVO/UDI pixel multiplier for VGA, same as DPLL_MD_UDI_MULTIPLIER_MASK. ++ * This best be set to the default value (3) or the CRT won't work. No, ++ * I don't entirely understand what this does... ++ */ ++#define DPLL_MD_VGA_UDI_MULTIPLIER_MASK 0x0000003f ++#define DPLL_MD_VGA_UDI_MULTIPLIER_SHIFT 0 ++#define DPLL_B_MD 0x06020 /* 965+ only */ ++#define FPA0 0x06040 ++#define FPA1 0x06044 ++#define FPB0 0x06048 ++#define FPB1 0x0604c ++#define FP_N_DIV_MASK 0x003f0000 ++#define FP_N_DIV_SHIFT 16 ++#define FP_M1_DIV_MASK 0x00003f00 ++#define FP_M1_DIV_SHIFT 8 ++#define FP_M2_DIV_MASK 0x0000003f ++#define FP_M2_DIV_SHIFT 0 ++#define DPLL_TEST 0x606c ++#define DPLLB_TEST_SDVO_DIV_1 (0 << 22) ++#define DPLLB_TEST_SDVO_DIV_2 (1 << 22) ++#define DPLLB_TEST_SDVO_DIV_4 (2 << 22) ++#define DPLLB_TEST_SDVO_DIV_MASK (3 << 22) ++#define DPLLB_TEST_N_BYPASS (1 << 19) ++#define DPLLB_TEST_M_BYPASS (1 << 18) ++#define DPLLB_INPUT_BUFFER_ENABLE (1 << 16) ++#define DPLLA_TEST_N_BYPASS (1 << 3) ++#define DPLLA_TEST_M_BYPASS (1 << 2) ++#define DPLLA_INPUT_BUFFER_ENABLE (1 << 0) ++#define D_STATE 0x6104 ++#define CG_2D_DIS 0x6200 ++#define CG_3D_DIS 0x6204 ++ ++/* ++ * Palette regs ++ */ ++ ++#define PALETTE_A 0x0a000 ++#define PALETTE_B 0x0a800 ++ ++/* ++ * Overlay regs ++ */ ++ ++#define OVADD 0x30000 ++#define DOVSTA 0x30008 ++#define OC_BUF (0x3<<20) ++#define OGAMC5 0x30010 ++#define OGAMC4 0x30014 ++#define OGAMC3 0x30018 ++#define OGAMC2 0x3001c ++#define OGAMC1 0x30020 ++#define OGAMC0 0x30024 ++ ++/* ++ * Display engine regs ++ */ ++ ++/* Pipe A timing regs */ ++#define HTOTAL_A 0x60000 ++#define HBLANK_A 0x60004 ++#define HSYNC_A 0x60008 ++#define VTOTAL_A 0x6000c ++#define VBLANK_A 0x60010 ++#define VSYNC_A 0x60014 ++#define PIPEASRC 0x6001c ++#define BCLRPAT_A 0x60020 ++ ++/* Pipe B timing regs */ ++#define HTOTAL_B 0x61000 ++#define HBLANK_B 0x61004 ++#define HSYNC_B 0x61008 ++#define VTOTAL_B 0x6100c ++#define VBLANK_B 0x61010 ++#define VSYNC_B 0x61014 ++#define PIPEBSRC 0x6101c ++#define BCLRPAT_B 0x61020 ++ ++/* VGA port control */ ++#define ADPA 0x61100 ++#define ADPA_DAC_ENABLE (1<<31) ++#define ADPA_DAC_DISABLE 0 ++#define ADPA_PIPE_SELECT_MASK (1<<30) ++#define ADPA_PIPE_A_SELECT 0 ++#define ADPA_PIPE_B_SELECT (1<<30) ++#define ADPA_USE_VGA_HVPOLARITY (1<<15) ++#define ADPA_SETS_HVPOLARITY 0 ++#define ADPA_VSYNC_CNTL_DISABLE (1<<11) ++#define ADPA_VSYNC_CNTL_ENABLE 0 ++#define ADPA_HSYNC_CNTL_DISABLE (1<<10) ++#define ADPA_HSYNC_CNTL_ENABLE 0 ++#define ADPA_VSYNC_ACTIVE_HIGH (1<<4) ++#define ADPA_VSYNC_ACTIVE_LOW 0 ++#define ADPA_HSYNC_ACTIVE_HIGH (1<<3) ++#define ADPA_HSYNC_ACTIVE_LOW 0 ++#define ADPA_DPMS_MASK (~(3<<10)) ++#define ADPA_DPMS_ON (0<<10) ++#define ADPA_DPMS_SUSPEND (1<<10) ++#define ADPA_DPMS_STANDBY (2<<10) ++#define ADPA_DPMS_OFF (3<<10) ++ ++/* Hotplug control (945+ only) */ ++#define PORT_HOTPLUG_EN 0x61110 ++#define SDVOB_HOTPLUG_INT_EN (1 << 26) ++#define SDVOC_HOTPLUG_INT_EN (1 << 25) ++#define TV_HOTPLUG_INT_EN (1 << 18) ++#define CRT_HOTPLUG_INT_EN (1 << 9) ++#define CRT_HOTPLUG_FORCE_DETECT (1 << 3) ++ ++#define PORT_HOTPLUG_STAT 0x61114 ++#define CRT_HOTPLUG_INT_STATUS (1 << 11) ++#define TV_HOTPLUG_INT_STATUS (1 << 10) ++#define CRT_HOTPLUG_MONITOR_MASK (3 << 8) ++#define CRT_HOTPLUG_MONITOR_COLOR (3 << 8) ++#define CRT_HOTPLUG_MONITOR_MONO (2 << 8) ++#define CRT_HOTPLUG_MONITOR_NONE (0 << 8) ++#define SDVOC_HOTPLUG_INT_STATUS (1 << 7) ++#define SDVOB_HOTPLUG_INT_STATUS (1 << 6) ++ ++/* SDVO port control */ ++#define SDVOB 0x61140 ++#define SDVOC 0x61160 ++#define SDVO_ENABLE (1 << 31) ++#define SDVO_PIPE_B_SELECT (1 << 30) ++#define SDVO_STALL_SELECT (1 << 29) ++#define SDVO_INTERRUPT_ENABLE (1 << 26) ++/** ++ * 915G/GM SDVO pixel multiplier. ++ * ++ * Programmed value is multiplier - 1, up to 5x. ++ * ++ * \sa DPLL_MD_UDI_MULTIPLIER_MASK ++ */ ++#define SDVO_PORT_MULTIPLY_MASK (7 << 23) ++#define SDVO_PORT_MULTIPLY_SHIFT 23 ++#define SDVO_PHASE_SELECT_MASK (15 << 19) ++#define SDVO_PHASE_SELECT_DEFAULT (6 << 19) ++#define SDVO_CLOCK_OUTPUT_INVERT (1 << 18) ++#define SDVOC_GANG_MODE (1 << 16) ++#define SDVO_BORDER_ENABLE (1 << 7) ++#define SDVOB_PCIE_CONCURRENCY (1 << 3) ++#define SDVO_DETECTED (1 << 2) ++/* Bits to be preserved when writing */ ++#define SDVOB_PRESERVE_MASK ((1 << 17) | (1 << 16) | (1 << 14) | (1 << 26)) ++#define SDVOC_PRESERVE_MASK ((1 << 17) | (1 << 26)) ++ ++/* DVO port control */ ++#define DVOA 0x61120 ++#define DVOB 0x61140 ++#define DVOC 0x61160 ++#define DVO_ENABLE (1 << 31) ++#define DVO_PIPE_B_SELECT (1 << 30) ++#define DVO_PIPE_STALL_UNUSED (0 << 28) ++#define DVO_PIPE_STALL (1 << 28) ++#define DVO_PIPE_STALL_TV (2 << 28) ++#define DVO_PIPE_STALL_MASK (3 << 28) ++#define DVO_USE_VGA_SYNC (1 << 15) ++#define DVO_DATA_ORDER_I740 (0 << 14) ++#define DVO_DATA_ORDER_FP (1 << 14) ++#define DVO_VSYNC_DISABLE (1 << 11) ++#define DVO_HSYNC_DISABLE (1 << 10) ++#define DVO_VSYNC_TRISTATE (1 << 9) ++#define DVO_HSYNC_TRISTATE (1 << 8) ++#define DVO_BORDER_ENABLE (1 << 7) ++#define DVO_DATA_ORDER_GBRG (1 << 6) ++#define DVO_DATA_ORDER_RGGB (0 << 6) ++#define DVO_DATA_ORDER_GBRG_ERRATA (0 << 6) ++#define DVO_DATA_ORDER_RGGB_ERRATA (1 << 6) ++#define DVO_VSYNC_ACTIVE_HIGH (1 << 4) ++#define DVO_HSYNC_ACTIVE_HIGH (1 << 3) ++#define DVO_BLANK_ACTIVE_HIGH (1 << 2) ++#define DVO_OUTPUT_CSTATE_PIXELS (1 << 1) /* SDG only */ ++#define DVO_OUTPUT_SOURCE_SIZE_PIXELS (1 << 0) /* SDG only */ ++#define DVO_PRESERVE_MASK (0x7<<24) ++#define DVOA_SRCDIM 0x61124 ++#define DVOB_SRCDIM 0x61144 ++#define DVOC_SRCDIM 0x61164 ++#define DVO_SRCDIM_HORIZONTAL_SHIFT 12 ++#define DVO_SRCDIM_VERTICAL_SHIFT 0 ++ ++/* LVDS port control */ ++#define LVDS 0x61180 ++/* ++ * Enables the LVDS port. This bit must be set before DPLLs are enabled, as ++ * the DPLL semantics change when the LVDS is assigned to that pipe. ++ */ ++#define LVDS_PORT_EN (1 << 31) ++/* Selects pipe B for LVDS data. Must be set on pre-965. */ ++#define LVDS_PIPEB_SELECT (1 << 30) ++/* ++ * Enables the A0-A2 data pairs and CLKA, containing 18 bits of color data per ++ * pixel. ++ */ ++#define LVDS_A0A2_CLKA_POWER_MASK (3 << 8) ++#define LVDS_A0A2_CLKA_POWER_DOWN (0 << 8) ++#define LVDS_A0A2_CLKA_POWER_UP (3 << 8) ++/* ++ * Controls the A3 data pair, which contains the additional LSBs for 24 bit ++ * mode. Only enabled if LVDS_A0A2_CLKA_POWER_UP also indicates it should be ++ * on. ++ */ ++#define LVDS_A3_POWER_MASK (3 << 6) ++#define LVDS_A3_POWER_DOWN (0 << 6) ++#define LVDS_A3_POWER_UP (3 << 6) ++/* ++ * Controls the CLKB pair. This should only be set when LVDS_B0B3_POWER_UP ++ * is set. ++ */ ++#define LVDS_CLKB_POWER_MASK (3 << 4) ++#define LVDS_CLKB_POWER_DOWN (0 << 4) ++#define LVDS_CLKB_POWER_UP (3 << 4) ++/* ++ * Controls the B0-B3 data pairs. This must be set to match the DPLL p2 ++ * setting for whether we are in dual-channel mode. The B3 pair will ++ * additionally only be powered up when LVDS_A3_POWER_UP is set. ++ */ ++#define LVDS_B0B3_POWER_MASK (3 << 2) ++#define LVDS_B0B3_POWER_DOWN (0 << 2) ++#define LVDS_B0B3_POWER_UP (3 << 2) ++ ++/* Panel power sequencing */ ++#define PP_STATUS 0x61200 ++#define PP_ON (1 << 31) ++/* ++ * Indicates that all dependencies of the panel are on: ++ * ++ * - PLL enabled ++ * - pipe enabled ++ * - LVDS/DVOB/DVOC on ++ */ ++#define PP_READY (1 << 30) ++#define PP_SEQUENCE_NONE (0 << 28) ++#define PP_SEQUENCE_ON (1 << 28) ++#define PP_SEQUENCE_OFF (2 << 28) ++#define PP_SEQUENCE_MASK 0x30000000 ++#define PP_CONTROL 0x61204 ++#define POWER_TARGET_ON (1 << 0) ++#define PP_ON_DELAYS 0x61208 ++#define PP_OFF_DELAYS 0x6120c ++#define PP_DIVISOR 0x61210 ++ ++/* Panel fitting */ ++#define PFIT_CONTROL 0x61230 ++#define PFIT_ENABLE (1 << 31) ++#define PFIT_PIPE_MASK (3 << 29) ++#define PFIT_PIPE_SHIFT 29 ++#define VERT_INTERP_DISABLE (0 << 10) ++#define VERT_INTERP_BILINEAR (1 << 10) ++#define VERT_INTERP_MASK (3 << 10) ++#define VERT_AUTO_SCALE (1 << 9) ++#define HORIZ_INTERP_DISABLE (0 << 6) ++#define HORIZ_INTERP_BILINEAR (1 << 6) ++#define HORIZ_INTERP_MASK (3 << 6) ++#define HORIZ_AUTO_SCALE (1 << 5) ++#define PANEL_8TO6_DITHER_ENABLE (1 << 3) ++#define PFIT_PGM_RATIOS 0x61234 ++#define PFIT_VERT_SCALE_MASK 0xfff00000 ++#define PFIT_HORIZ_SCALE_MASK 0x0000fff0 ++#define PFIT_AUTO_RATIOS 0x61238 ++ ++/* Backlight control */ ++#define BLC_PWM_CTL 0x61254 ++#define BACKLIGHT_MODULATION_FREQ_SHIFT (17) ++#define BLC_PWM_CTL2 0x61250 /* 965+ only */ ++/* ++ * This is the most significant 15 bits of the number of backlight cycles in a ++ * complete cycle of the modulated backlight control. ++ * ++ * The actual value is this field multiplied by two. ++ */ ++#define BACKLIGHT_MODULATION_FREQ_MASK (0x7fff << 17) ++#define BLM_LEGACY_MODE (1 << 16) ++/* ++ * This is the number of cycles out of the backlight modulation cycle for which ++ * the backlight is on. ++ * ++ * This field must be no greater than the number of cycles in the complete ++ * backlight modulation cycle. ++ */ ++#define BACKLIGHT_DUTY_CYCLE_SHIFT (0) ++#define BACKLIGHT_DUTY_CYCLE_MASK (0xffff) ++ ++/* TV port control */ ++#define TV_CTL 0x68000 ++/** Enables the TV encoder */ ++# define TV_ENC_ENABLE (1 << 31) ++/** Sources the TV encoder input from pipe B instead of A. */ ++# define TV_ENC_PIPEB_SELECT (1 << 30) ++/** Outputs composite video (DAC A only) */ ++# define TV_ENC_OUTPUT_COMPOSITE (0 << 28) ++/** Outputs SVideo video (DAC B/C) */ ++# define TV_ENC_OUTPUT_SVIDEO (1 << 28) ++/** Outputs Component video (DAC A/B/C) */ ++# define TV_ENC_OUTPUT_COMPONENT (2 << 28) ++/** Outputs Composite and SVideo (DAC A/B/C) */ ++# define TV_ENC_OUTPUT_SVIDEO_COMPOSITE (3 << 28) ++# define TV_TRILEVEL_SYNC (1 << 21) ++/** Enables slow sync generation (945GM only) */ ++# define TV_SLOW_SYNC (1 << 20) ++/** Selects 4x oversampling for 480i and 576p */ ++# define TV_OVERSAMPLE_4X (0 << 18) ++/** Selects 2x oversampling for 720p and 1080i */ ++# define TV_OVERSAMPLE_2X (1 << 18) ++/** Selects no oversampling for 1080p */ ++# define TV_OVERSAMPLE_NONE (2 << 18) ++/** Selects 8x oversampling */ ++# define TV_OVERSAMPLE_8X (3 << 18) ++/** Selects progressive mode rather than interlaced */ ++# define TV_PROGRESSIVE (1 << 17) ++/** Sets the colorburst to PAL mode. Required for non-M PAL modes. */ ++# define TV_PAL_BURST (1 << 16) ++/** Field for setting delay of Y compared to C */ ++# define TV_YC_SKEW_MASK (7 << 12) ++/** Enables a fix for 480p/576p standard definition modes on the 915GM only */ ++# define TV_ENC_SDP_FIX (1 << 11) ++/** ++ * Enables a fix for the 915GM only. ++ * ++ * Not sure what it does. ++ */ ++# define TV_ENC_C0_FIX (1 << 10) ++/** Bits that must be preserved by software */ ++# define TV_CTL_SAVE ((3 << 8) | (3 << 6)) ++# define TV_FUSE_STATE_MASK (3 << 4) ++/** Read-only state that reports all features enabled */ ++# define TV_FUSE_STATE_ENABLED (0 << 4) ++/** Read-only state that reports that Macrovision is disabled in hardware*/ ++# define TV_FUSE_STATE_NO_MACROVISION (1 << 4) ++/** Read-only state that reports that TV-out is disabled in hardware. */ ++# define TV_FUSE_STATE_DISABLED (2 << 4) ++/** Normal operation */ ++# define TV_TEST_MODE_NORMAL (0 << 0) ++/** Encoder test pattern 1 - combo pattern */ ++# define TV_TEST_MODE_PATTERN_1 (1 << 0) ++/** Encoder test pattern 2 - full screen vertical 75% color bars */ ++# define TV_TEST_MODE_PATTERN_2 (2 << 0) ++/** Encoder test pattern 3 - full screen horizontal 75% color bars */ ++# define TV_TEST_MODE_PATTERN_3 (3 << 0) ++/** Encoder test pattern 4 - random noise */ ++# define TV_TEST_MODE_PATTERN_4 (4 << 0) ++/** Encoder test pattern 5 - linear color ramps */ ++# define TV_TEST_MODE_PATTERN_5 (5 << 0) ++/** ++ * This test mode forces the DACs to 50% of full output. ++ * ++ * This is used for load detection in combination with TVDAC_SENSE_MASK ++ */ ++# define TV_TEST_MODE_MONITOR_DETECT (7 << 0) ++# define TV_TEST_MODE_MASK (7 << 0) ++ ++#define TV_DAC 0x68004 ++/** ++ * Reports that DAC state change logic has reported change (RO). ++ * ++ * This gets cleared when TV_DAC_STATE_EN is cleared ++*/ ++# define TVDAC_STATE_CHG (1 << 31) ++# define TVDAC_SENSE_MASK (7 << 28) ++/** Reports that DAC A voltage is above the detect threshold */ ++# define TVDAC_A_SENSE (1 << 30) ++/** Reports that DAC B voltage is above the detect threshold */ ++# define TVDAC_B_SENSE (1 << 29) ++/** Reports that DAC C voltage is above the detect threshold */ ++# define TVDAC_C_SENSE (1 << 28) ++/** ++ * Enables DAC state detection logic, for load-based TV detection. ++ * ++ * The PLL of the chosen pipe (in TV_CTL) must be running, and the encoder set ++ * to off, for load detection to work. ++ */ ++# define TVDAC_STATE_CHG_EN (1 << 27) ++/** Sets the DAC A sense value to high */ ++# define TVDAC_A_SENSE_CTL (1 << 26) ++/** Sets the DAC B sense value to high */ ++# define TVDAC_B_SENSE_CTL (1 << 25) ++/** Sets the DAC C sense value to high */ ++# define TVDAC_C_SENSE_CTL (1 << 24) ++/** Overrides the ENC_ENABLE and DAC voltage levels */ ++# define DAC_CTL_OVERRIDE (1 << 7) ++/** Sets the slew rate. Must be preserved in software */ ++# define ENC_TVDAC_SLEW_FAST (1 << 6) ++# define DAC_A_1_3_V (0 << 4) ++# define DAC_A_1_1_V (1 << 4) ++# define DAC_A_0_7_V (2 << 4) ++# define DAC_A_OFF (3 << 4) ++# define DAC_B_1_3_V (0 << 2) ++# define DAC_B_1_1_V (1 << 2) ++# define DAC_B_0_7_V (2 << 2) ++# define DAC_B_OFF (3 << 2) ++# define DAC_C_1_3_V (0 << 0) ++# define DAC_C_1_1_V (1 << 0) ++# define DAC_C_0_7_V (2 << 0) ++# define DAC_C_OFF (3 << 0) ++ ++/** ++ * CSC coefficients are stored in a floating point format with 9 bits of ++ * mantissa and 2 or 3 bits of exponent. The exponent is represented as 2**-n, ++ * where 2-bit exponents are unsigned n, and 3-bit exponents are signed n with ++ * -1 (0x3) being the only legal negative value. ++ */ ++#define TV_CSC_Y 0x68010 ++# define TV_RY_MASK 0x07ff0000 ++# define TV_RY_SHIFT 16 ++# define TV_GY_MASK 0x00000fff ++# define TV_GY_SHIFT 0 ++ ++#define TV_CSC_Y2 0x68014 ++# define TV_BY_MASK 0x07ff0000 ++# define TV_BY_SHIFT 16 ++/** ++ * Y attenuation for component video. ++ * ++ * Stored in 1.9 fixed point. ++ */ ++# define TV_AY_MASK 0x000003ff ++# define TV_AY_SHIFT 0 ++ ++#define TV_CSC_U 0x68018 ++# define TV_RU_MASK 0x07ff0000 ++# define TV_RU_SHIFT 16 ++# define TV_GU_MASK 0x000007ff ++# define TV_GU_SHIFT 0 ++ ++#define TV_CSC_U2 0x6801c ++# define TV_BU_MASK 0x07ff0000 ++# define TV_BU_SHIFT 16 ++/** ++ * U attenuation for component video. ++ * ++ * Stored in 1.9 fixed point. ++ */ ++# define TV_AU_MASK 0x000003ff ++# define TV_AU_SHIFT 0 ++ ++#define TV_CSC_V 0x68020 ++# define TV_RV_MASK 0x0fff0000 ++# define TV_RV_SHIFT 16 ++# define TV_GV_MASK 0x000007ff ++# define TV_GV_SHIFT 0 ++ ++#define TV_CSC_V2 0x68024 ++# define TV_BV_MASK 0x07ff0000 ++# define TV_BV_SHIFT 16 ++/** ++ * V attenuation for component video. ++ * ++ * Stored in 1.9 fixed point. ++ */ ++# define TV_AV_MASK 0x000007ff ++# define TV_AV_SHIFT 0 ++ ++#define TV_CLR_KNOBS 0x68028 ++/** 2s-complement brightness adjustment */ ++# define TV_BRIGHTNESS_MASK 0xff000000 ++# define TV_BRIGHTNESS_SHIFT 24 ++/** Contrast adjustment, as a 2.6 unsigned floating point number */ ++# define TV_CONTRAST_MASK 0x00ff0000 ++# define TV_CONTRAST_SHIFT 16 ++/** Saturation adjustment, as a 2.6 unsigned floating point number */ ++# define TV_SATURATION_MASK 0x0000ff00 ++# define TV_SATURATION_SHIFT 8 ++/** Hue adjustment, as an integer phase angle in degrees */ ++# define TV_HUE_MASK 0x000000ff ++# define TV_HUE_SHIFT 0 ++ ++#define TV_CLR_LEVEL 0x6802c ++/** Controls the DAC level for black */ ++# define TV_BLACK_LEVEL_MASK 0x01ff0000 ++# define TV_BLACK_LEVEL_SHIFT 16 ++/** Controls the DAC level for blanking */ ++# define TV_BLANK_LEVEL_MASK 0x000001ff ++# define TV_BLANK_LEVEL_SHIFT 0 ++ ++#define TV_H_CTL_1 0x68030 ++/** Number of pixels in the hsync. */ ++# define TV_HSYNC_END_MASK 0x1fff0000 ++# define TV_HSYNC_END_SHIFT 16 ++/** Total number of pixels minus one in the line (display and blanking). */ ++# define TV_HTOTAL_MASK 0x00001fff ++# define TV_HTOTAL_SHIFT 0 ++ ++#define TV_H_CTL_2 0x68034 ++/** Enables the colorburst (needed for non-component color) */ ++# define TV_BURST_ENA (1 << 31) ++/** Offset of the colorburst from the start of hsync, in pixels minus one. */ ++# define TV_HBURST_START_SHIFT 16 ++# define TV_HBURST_START_MASK 0x1fff0000 ++/** Length of the colorburst */ ++# define TV_HBURST_LEN_SHIFT 0 ++# define TV_HBURST_LEN_MASK 0x0001fff ++ ++#define TV_H_CTL_3 0x68038 ++/** End of hblank, measured in pixels minus one from start of hsync */ ++# define TV_HBLANK_END_SHIFT 16 ++# define TV_HBLANK_END_MASK 0x1fff0000 ++/** Start of hblank, measured in pixels minus one from start of hsync */ ++# define TV_HBLANK_START_SHIFT 0 ++# define TV_HBLANK_START_MASK 0x0001fff ++ ++#define TV_V_CTL_1 0x6803c ++/** XXX */ ++# define TV_NBR_END_SHIFT 16 ++# define TV_NBR_END_MASK 0x07ff0000 ++/** XXX */ ++# define TV_VI_END_F1_SHIFT 8 ++# define TV_VI_END_F1_MASK 0x00003f00 ++/** XXX */ ++# define TV_VI_END_F2_SHIFT 0 ++# define TV_VI_END_F2_MASK 0x0000003f ++ ++#define TV_V_CTL_2 0x68040 ++/** Length of vsync, in half lines */ ++# define TV_VSYNC_LEN_MASK 0x07ff0000 ++# define TV_VSYNC_LEN_SHIFT 16 ++/** Offset of the start of vsync in field 1, measured in one less than the ++ * number of half lines. ++ */ ++# define TV_VSYNC_START_F1_MASK 0x00007f00 ++# define TV_VSYNC_START_F1_SHIFT 8 ++/** ++ * Offset of the start of vsync in field 2, measured in one less than the ++ * number of half lines. ++ */ ++# define TV_VSYNC_START_F2_MASK 0x0000007f ++# define TV_VSYNC_START_F2_SHIFT 0 ++ ++#define TV_V_CTL_3 0x68044 ++/** Enables generation of the equalization signal */ ++# define TV_EQUAL_ENA (1 << 31) ++/** Length of vsync, in half lines */ ++# define TV_VEQ_LEN_MASK 0x007f0000 ++# define TV_VEQ_LEN_SHIFT 16 ++/** Offset of the start of equalization in field 1, measured in one less than ++ * the number of half lines. ++ */ ++# define TV_VEQ_START_F1_MASK 0x0007f00 ++# define TV_VEQ_START_F1_SHIFT 8 ++/** ++ * Offset of the start of equalization in field 2, measured in one less than ++ * the number of half lines. ++ */ ++# define TV_VEQ_START_F2_MASK 0x000007f ++# define TV_VEQ_START_F2_SHIFT 0 ++ ++#define TV_V_CTL_4 0x68048 ++/** ++ * Offset to start of vertical colorburst, measured in one less than the ++ * number of lines from vertical start. ++ */ ++# define TV_VBURST_START_F1_MASK 0x003f0000 ++# define TV_VBURST_START_F1_SHIFT 16 ++/** ++ * Offset to the end of vertical colorburst, measured in one less than the ++ * number of lines from the start of NBR. ++ */ ++# define TV_VBURST_END_F1_MASK 0x000000ff ++# define TV_VBURST_END_F1_SHIFT 0 ++ ++#define TV_V_CTL_5 0x6804c ++/** ++ * Offset to start of vertical colorburst, measured in one less than the ++ * number of lines from vertical start. ++ */ ++# define TV_VBURST_START_F2_MASK 0x003f0000 ++# define TV_VBURST_START_F2_SHIFT 16 ++/** ++ * Offset to the end of vertical colorburst, measured in one less than the ++ * number of lines from the start of NBR. ++ */ ++# define TV_VBURST_END_F2_MASK 0x000000ff ++# define TV_VBURST_END_F2_SHIFT 0 ++ ++#define TV_V_CTL_6 0x68050 ++/** ++ * Offset to start of vertical colorburst, measured in one less than the ++ * number of lines from vertical start. ++ */ ++# define TV_VBURST_START_F3_MASK 0x003f0000 ++# define TV_VBURST_START_F3_SHIFT 16 ++/** ++ * Offset to the end of vertical colorburst, measured in one less than the ++ * number of lines from the start of NBR. ++ */ ++# define TV_VBURST_END_F3_MASK 0x000000ff ++# define TV_VBURST_END_F3_SHIFT 0 ++ ++#define TV_V_CTL_7 0x68054 ++/** ++ * Offset to start of vertical colorburst, measured in one less than the ++ * number of lines from vertical start. ++ */ ++# define TV_VBURST_START_F4_MASK 0x003f0000 ++# define TV_VBURST_START_F4_SHIFT 16 ++/** ++ * Offset to the end of vertical colorburst, measured in one less than the ++ * number of lines from the start of NBR. ++ */ ++# define TV_VBURST_END_F4_MASK 0x000000ff ++# define TV_VBURST_END_F4_SHIFT 0 ++ ++#define TV_SC_CTL_1 0x68060 ++/** Turns on the first subcarrier phase generation DDA */ ++# define TV_SC_DDA1_EN (1 << 31) ++/** Turns on the first subcarrier phase generation DDA */ ++# define TV_SC_DDA2_EN (1 << 30) ++/** Turns on the first subcarrier phase generation DDA */ ++# define TV_SC_DDA3_EN (1 << 29) ++/** Sets the subcarrier DDA to reset frequency every other field */ ++# define TV_SC_RESET_EVERY_2 (0 << 24) ++/** Sets the subcarrier DDA to reset frequency every fourth field */ ++# define TV_SC_RESET_EVERY_4 (1 << 24) ++/** Sets the subcarrier DDA to reset frequency every eighth field */ ++# define TV_SC_RESET_EVERY_8 (2 << 24) ++/** Sets the subcarrier DDA to never reset the frequency */ ++# define TV_SC_RESET_NEVER (3 << 24) ++/** Sets the peak amplitude of the colorburst.*/ ++# define TV_BURST_LEVEL_MASK 0x00ff0000 ++# define TV_BURST_LEVEL_SHIFT 16 ++/** Sets the increment of the first subcarrier phase generation DDA */ ++# define TV_SCDDA1_INC_MASK 0x00000fff ++# define TV_SCDDA1_INC_SHIFT 0 ++ ++#define TV_SC_CTL_2 0x68064 ++/** Sets the rollover for the second subcarrier phase generation DDA */ ++# define TV_SCDDA2_SIZE_MASK 0x7fff0000 ++# define TV_SCDDA2_SIZE_SHIFT 16 ++/** Sets the increent of the second subcarrier phase generation DDA */ ++# define TV_SCDDA2_INC_MASK 0x00007fff ++# define TV_SCDDA2_INC_SHIFT 0 ++ ++#define TV_SC_CTL_3 0x68068 ++/** Sets the rollover for the third subcarrier phase generation DDA */ ++# define TV_SCDDA3_SIZE_MASK 0x7fff0000 ++# define TV_SCDDA3_SIZE_SHIFT 16 ++/** Sets the increent of the third subcarrier phase generation DDA */ ++# define TV_SCDDA3_INC_MASK 0x00007fff ++# define TV_SCDDA3_INC_SHIFT 0 ++ ++#define TV_WIN_POS 0x68070 ++/** X coordinate of the display from the start of horizontal active */ ++# define TV_XPOS_MASK 0x1fff0000 ++# define TV_XPOS_SHIFT 16 ++/** Y coordinate of the display from the start of vertical active (NBR) */ ++# define TV_YPOS_MASK 0x00000fff ++# define TV_YPOS_SHIFT 0 ++ ++#define TV_WIN_SIZE 0x68074 ++/** Horizontal size of the display window, measured in pixels*/ ++# define TV_XSIZE_MASK 0x1fff0000 ++# define TV_XSIZE_SHIFT 16 ++/** ++ * Vertical size of the display window, measured in pixels. ++ * ++ * Must be even for interlaced modes. ++ */ ++# define TV_YSIZE_MASK 0x00000fff ++# define TV_YSIZE_SHIFT 0 ++ ++#define TV_FILTER_CTL_1 0x68080 ++/** ++ * Enables automatic scaling calculation. ++ * ++ * If set, the rest of the registers are ignored, and the calculated values can ++ * be read back from the register. ++ */ ++# define TV_AUTO_SCALE (1 << 31) ++/** ++ * Disables the vertical filter. ++ * ++ * This is required on modes more than 1024 pixels wide */ ++# define TV_V_FILTER_BYPASS (1 << 29) ++/** Enables adaptive vertical filtering */ ++# define TV_VADAPT (1 << 28) ++# define TV_VADAPT_MODE_MASK (3 << 26) ++/** Selects the least adaptive vertical filtering mode */ ++# define TV_VADAPT_MODE_LEAST (0 << 26) ++/** Selects the moderately adaptive vertical filtering mode */ ++# define TV_VADAPT_MODE_MODERATE (1 << 26) ++/** Selects the most adaptive vertical filtering mode */ ++# define TV_VADAPT_MODE_MOST (3 << 26) ++/** ++ * Sets the horizontal scaling factor. ++ * ++ * This should be the fractional part of the horizontal scaling factor divided ++ * by the oversampling rate. TV_HSCALE should be less than 1, and set to: ++ * ++ * (src width - 1) / ((oversample * dest width) - 1) ++ */ ++# define TV_HSCALE_FRAC_MASK 0x00003fff ++# define TV_HSCALE_FRAC_SHIFT 0 ++ ++#define TV_FILTER_CTL_2 0x68084 ++/** ++ * Sets the integer part of the 3.15 fixed-point vertical scaling factor. ++ * ++ * TV_VSCALE should be (src height - 1) / ((interlace * dest height) - 1) ++ */ ++# define TV_VSCALE_INT_MASK 0x00038000 ++# define TV_VSCALE_INT_SHIFT 15 ++/** ++ * Sets the fractional part of the 3.15 fixed-point vertical scaling factor. ++ * ++ * \sa TV_VSCALE_INT_MASK ++ */ ++# define TV_VSCALE_FRAC_MASK 0x00007fff ++# define TV_VSCALE_FRAC_SHIFT 0 ++ ++#define TV_FILTER_CTL_3 0x68088 ++/** ++ * Sets the integer part of the 3.15 fixed-point vertical scaling factor. ++ * ++ * TV_VSCALE should be (src height - 1) / (1/4 * (dest height - 1)) ++ * ++ * For progressive modes, TV_VSCALE_IP_INT should be set to zeroes. ++ */ ++# define TV_VSCALE_IP_INT_MASK 0x00038000 ++# define TV_VSCALE_IP_INT_SHIFT 15 ++/** ++ * Sets the fractional part of the 3.15 fixed-point vertical scaling factor. ++ * ++ * For progressive modes, TV_VSCALE_IP_INT should be set to zeroes. ++ * ++ * \sa TV_VSCALE_IP_INT_MASK ++ */ ++# define TV_VSCALE_IP_FRAC_MASK 0x00007fff ++# define TV_VSCALE_IP_FRAC_SHIFT 0 ++ ++#define TV_CC_CONTROL 0x68090 ++# define TV_CC_ENABLE (1 << 31) ++/** ++ * Specifies which field to send the CC data in. ++ * ++ * CC data is usually sent in field 0. ++ */ ++# define TV_CC_FID_MASK (1 << 27) ++# define TV_CC_FID_SHIFT 27 ++/** Sets the horizontal position of the CC data. Usually 135. */ ++# define TV_CC_HOFF_MASK 0x03ff0000 ++# define TV_CC_HOFF_SHIFT 16 ++/** Sets the vertical position of the CC data. Usually 21 */ ++# define TV_CC_LINE_MASK 0x0000003f ++# define TV_CC_LINE_SHIFT 0 ++ ++#define TV_CC_DATA 0x68094 ++# define TV_CC_RDY (1 << 31) ++/** Second word of CC data to be transmitted. */ ++# define TV_CC_DATA_2_MASK 0x007f0000 ++# define TV_CC_DATA_2_SHIFT 16 ++/** First word of CC data to be transmitted. */ ++# define TV_CC_DATA_1_MASK 0x0000007f ++# define TV_CC_DATA_1_SHIFT 0 ++ ++#define TV_H_LUMA_0 0x68100 ++#define TV_H_LUMA_59 0x681ec ++#define TV_H_CHROMA_0 0x68200 ++#define TV_H_CHROMA_59 0x682ec ++#define TV_V_LUMA_0 0x68300 ++#define TV_V_LUMA_42 0x683a8 ++#define TV_V_CHROMA_0 0x68400 ++#define TV_V_CHROMA_42 0x684a8 ++ ++/* Display & cursor control */ ++ ++/* Pipe A */ ++#define PIPEADSL 0x70000 ++#define PIPEACONF 0x70008 ++#define PIPEACONF_ENABLE (1<<31) ++#define PIPEACONF_DISABLE 0 ++#define PIPEACONF_DOUBLE_WIDE (1<<30) ++#define I965_PIPECONF_ACTIVE (1<<30) ++#define PIPEACONF_SINGLE_WIDE 0 ++#define PIPEACONF_PIPE_UNLOCKED 0 ++#define PIPEACONF_PIPE_LOCKED (1<<25) ++#define PIPEACONF_PALETTE 0 ++#define PIPEACONF_GAMMA (1<<24) ++#define PIPECONF_FORCE_BORDER (1<<25) ++#define PIPECONF_PROGRESSIVE (0 << 21) ++#define PIPECONF_INTERLACE_W_FIELD_INDICATION (6 << 21) ++#define PIPECONF_INTERLACE_FIELD_0_ONLY (7 << 21) ++#define PIPEASTAT 0x70024 ++#define PIPE_FIFO_UNDERRUN_STATUS (1UL<<31) ++#define PIPE_CRC_ERROR_ENABLE (1UL<<29) ++#define PIPE_CRC_DONE_ENABLE (1UL<<28) ++#define PIPE_GMBUS_EVENT_ENABLE (1UL<<27) ++#define PIPE_HOTPLUG_INTERRUPT_ENABLE (1UL<<26) ++#define PIPE_VSYNC_INTERRUPT_ENABLE (1UL<<25) ++#define PIPE_DISPLAY_LINE_COMPARE_ENABLE (1UL<<24) ++#define PIPE_DPST_EVENT_ENABLE (1UL<<23) ++#define PIPE_LEGACY_BLC_EVENT_ENABLE (1UL<<22) ++#define PIPE_ODD_FIELD_INTERRUPT_ENABLE (1UL<<21) ++#define PIPE_EVEN_FIELD_INTERRUPT_ENABLE (1UL<<20) ++#define PIPE_HOTPLUG_TV_INTERRUPT_ENABLE (1UL<<18) /* pre-965 */ ++#define PIPE_START_VBLANK_INTERRUPT_ENABLE (1UL<<18) /* 965 or later */ ++#define PIPE_VBLANK_INTERRUPT_ENABLE (1UL<<17) ++#define PIPE_OVERLAY_UPDATED_ENABLE (1UL<<16) ++#define PIPE_CRC_ERROR_INTERRUPT_STATUS (1UL<<13) ++#define PIPE_CRC_DONE_INTERRUPT_STATUS (1UL<<12) ++#define PIPE_GMBUS_INTERRUPT_STATUS (1UL<<11) ++#define PIPE_HOTPLUG_INTERRUPT_STATUS (1UL<<10) ++#define PIPE_VSYNC_INTERRUPT_STATUS (1UL<<9) ++#define PIPE_DISPLAY_LINE_COMPARE_STATUS (1UL<<8) ++#define PIPE_DPST_EVENT_STATUS (1UL<<7) ++#define PIPE_LEGACY_BLC_EVENT_STATUS (1UL<<6) ++#define PIPE_ODD_FIELD_INTERRUPT_STATUS (1UL<<5) ++#define PIPE_EVEN_FIELD_INTERRUPT_STATUS (1UL<<4) ++#define PIPE_HOTPLUG_TV_INTERRUPT_STATUS (1UL<<2) /* pre-965 */ ++#define PIPE_START_VBLANK_INTERRUPT_STATUS (1UL<<2) /* 965 or later */ ++#define PIPE_VBLANK_INTERRUPT_STATUS (1UL<<1) ++#define PIPE_OVERLAY_UPDATED_STATUS (1UL<<0) ++ ++#define DSPARB 0x70030 ++#define DSPARB_CSTART_MASK (0x7f << 7) ++#define DSPARB_CSTART_SHIFT 7 ++#define DSPARB_BSTART_MASK (0x7f) ++#define DSPARB_BSTART_SHIFT 0 ++/* ++ * The two pipe frame counter registers are not synchronized, so ++ * reading a stable value is somewhat tricky. The following code ++ * should work: ++ * ++ * do { ++ * high1 = ((INREG(PIPEAFRAMEHIGH) & PIPE_FRAME_HIGH_MASK) >> ++ * PIPE_FRAME_HIGH_SHIFT; ++ * low1 = ((INREG(PIPEAFRAMEPIXEL) & PIPE_FRAME_LOW_MASK) >> ++ * PIPE_FRAME_LOW_SHIFT); ++ * high2 = ((INREG(PIPEAFRAMEHIGH) & PIPE_FRAME_HIGH_MASK) >> ++ * PIPE_FRAME_HIGH_SHIFT); ++ * } while (high1 != high2); ++ * frame = (high1 << 8) | low1; ++ */ ++#define PIPEAFRAMEHIGH 0x70040 ++#define PIPE_FRAME_HIGH_MASK 0x0000ffff ++#define PIPE_FRAME_HIGH_SHIFT 0 ++#define PIPEAFRAMEPIXEL 0x70044 ++#define PIPE_FRAME_LOW_MASK 0xff000000 ++#define PIPE_FRAME_LOW_SHIFT 24 ++#define PIPE_PIXEL_MASK 0x00ffffff ++#define PIPE_PIXEL_SHIFT 0 ++ ++/* Cursor A & B regs */ ++#define CURACNTR 0x70080 ++#define CURSOR_MODE_DISABLE 0x00 ++#define CURSOR_MODE_64_32B_AX 0x07 ++#define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX) ++#define MCURSOR_GAMMA_ENABLE (1 << 26) ++#define CURABASE 0x70084 ++#define CURAPOS 0x70088 ++#define CURSOR_POS_MASK 0x007FF ++#define CURSOR_POS_SIGN 0x8000 ++#define CURSOR_X_SHIFT 0 ++#define CURSOR_Y_SHIFT 16 ++#define CURBCNTR 0x700c0 ++#define CURBBASE 0x700c4 ++#define CURBPOS 0x700c8 ++ ++/* Display A control */ ++#define DSPACNTR 0x70180 ++#define DISPLAY_PLANE_ENABLE (1<<31) ++#define DISPLAY_PLANE_DISABLE 0 ++#define DISPPLANE_GAMMA_ENABLE (1<<30) ++#define DISPPLANE_GAMMA_DISABLE 0 ++#define DISPPLANE_PIXFORMAT_MASK (0xf<<26) ++#define DISPPLANE_8BPP (0x2<<26) ++#define DISPPLANE_15_16BPP (0x4<<26) ++#define DISPPLANE_16BPP (0x5<<26) ++#define DISPPLANE_32BPP_NO_ALPHA (0x6<<26) ++#define DISPPLANE_32BPP (0x7<<26) ++#define DISPPLANE_STEREO_ENABLE (1<<25) ++#define DISPPLANE_STEREO_DISABLE 0 ++#define DISPPLANE_SEL_PIPE_MASK (1<<24) ++#define DISPPLANE_SEL_PIPE_A 0 ++#define DISPPLANE_SEL_PIPE_B (1<<24) ++#define DISPPLANE_SRC_KEY_ENABLE (1<<22) ++#define DISPPLANE_SRC_KEY_DISABLE 0 ++#define DISPPLANE_LINE_DOUBLE (1<<20) ++#define DISPPLANE_NO_LINE_DOUBLE 0 ++#define DISPPLANE_STEREO_POLARITY_FIRST 0 ++#define DISPPLANE_STEREO_POLARITY_SECOND (1<<18) ++#define DSPAADDR 0x70184 ++#define DSPASTRIDE 0x70188 ++#define DSPAPOS 0x7018C /* reserved */ ++#define DSPASIZE 0x70190 ++#define DSPASURF 0x7019C /* 965+ only */ ++#define DSPATILEOFF 0x701A4 /* 965+ only */ ++ ++/* VBIOS flags */ ++#define SWF00 0x71410 ++#define SWF01 0x71414 ++#define SWF02 0x71418 ++#define SWF03 0x7141c ++#define SWF04 0x71420 ++#define SWF05 0x71424 ++#define SWF06 0x71428 ++#define SWF10 0x70410 ++#define SWF11 0x70414 ++#define SWF14 0x71420 ++#define SWF30 0x72414 ++#define SWF31 0x72418 ++#define SWF32 0x7241c ++ ++/* Pipe B */ ++#define PIPEBDSL 0x71000 ++#define PIPEBCONF 0x71008 ++#define PIPEBSTAT 0x71024 ++#define PIPEBFRAMEHIGH 0x71040 ++#define PIPEBFRAMEPIXEL 0x71044 ++ ++/* Display B control */ ++#define DSPBCNTR 0x71180 ++#define DISPPLANE_ALPHA_TRANS_ENABLE (1<<15) ++#define DISPPLANE_ALPHA_TRANS_DISABLE 0 ++#define DISPPLANE_SPRITE_ABOVE_DISPLAY 0 ++#define DISPPLANE_SPRITE_ABOVE_OVERLAY (1) ++#define DSPBADDR 0x71184 ++#define DSPBSTRIDE 0x71188 ++#define DSPBPOS 0x7118C ++#define DSPBSIZE 0x71190 ++#define DSPBSURF 0x7119C ++#define DSPBTILEOFF 0x711A4 ++ ++/* VBIOS regs */ ++#define VGACNTRL 0x71400 ++# define VGA_DISP_DISABLE (1 << 31) ++# define VGA_2X_MODE (1 << 30) ++# define VGA_PIPE_B_SELECT (1 << 29) ++ ++#endif /* _I915_REG_H_ */ + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0003_i915.Add_support_for_MSI_and_interrupt_mitigation.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0003_i915.Add_support_for_MSI_and_interrupt_mitigation.patch new file mode 100644 index 000000000..70f91194e --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0003_i915.Add_support_for_MSI_and_interrupt_mitigation.patch @@ -0,0 +1,421 @@ +From: Eric Anholt <eric@anholt.net> +Date: Tue, 29 Jul 2008 19:10:39 +0000 (-0700) +Subject: i915: Add support for MSI and interrupt mitigation. +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=aae4223e2fd3b29ae8e070b7a16d8cfc70c6a0c0 + +i915: Add support for MSI and interrupt mitigation. + +Previous attempts at interrupt mitigation had been foiled by i915_wait_irq's +failure to update the sarea seqno value when the status page indicated that +the seqno had already been passed. MSI support has been seen to cut CPU +costs by up to 40% in some workloads by avoiding other expensive interrupt +handlers for frequent graphics interrupts. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/drm_irq.c ++++ b/drivers/gpu/drm/drm_irq.c +@@ -63,7 +63,7 @@ int drm_irq_by_busid(struct drm_device * + p->devnum != PCI_SLOT(dev->pdev->devfn) || p->funcnum != PCI_FUNC(dev->pdev->devfn)) + return -EINVAL; + +- p->irq = dev->irq; ++ p->irq = dev->pdev->irq; + + DRM_DEBUG("%d:%d:%d => IRQ %d\n", p->busnum, p->devnum, p->funcnum, + p->irq); +@@ -89,7 +89,7 @@ static int drm_irq_install(struct drm_de + if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) + return -EINVAL; + +- if (dev->irq == 0) ++ if (dev->pdev->irq == 0) + return -EINVAL; + + mutex_lock(&dev->struct_mutex); +@@ -107,7 +107,7 @@ static int drm_irq_install(struct drm_de + dev->irq_enabled = 1; + mutex_unlock(&dev->struct_mutex); + +- DRM_DEBUG("irq=%d\n", dev->irq); ++ DRM_DEBUG("irq=%d\n", dev->pdev->irq); + + if (drm_core_check_feature(dev, DRIVER_IRQ_VBL)) { + init_waitqueue_head(&dev->vbl_queue); +@@ -127,8 +127,12 @@ static int drm_irq_install(struct drm_de + if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED)) + sh_flags = IRQF_SHARED; + +- ret = request_irq(dev->irq, dev->driver->irq_handler, ++ ret = request_irq(dev->pdev->irq, dev->driver->irq_handler, + sh_flags, dev->devname, dev); ++ /* Expose the device irq number to drivers that want to export it for ++ * whatever reason. ++ */ ++ dev->irq = dev->pdev->irq; + if (ret < 0) { + mutex_lock(&dev->struct_mutex); + dev->irq_enabled = 0; +@@ -164,11 +168,11 @@ int drm_irq_uninstall(struct drm_device + if (!irq_enabled) + return -EINVAL; + +- DRM_DEBUG("irq=%d\n", dev->irq); ++ DRM_DEBUG("irq=%d\n", dev->pdev->irq); + + dev->driver->irq_uninstall(dev); + +- free_irq(dev->irq, dev); ++ free_irq(dev->pdev->irq, dev); + + dev->locked_tasklet_func = NULL; + +@@ -201,7 +205,7 @@ int drm_control(struct drm_device *dev, + if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) + return 0; + if (dev->if_version < DRM_IF_VERSION(1, 2) && +- ctl->irq != dev->irq) ++ ctl->irq != dev->pdev->irq) + return -EINVAL; + return drm_irq_install(dev); + case DRM_UNINST_HANDLER: +@@ -239,7 +243,7 @@ int drm_wait_vblank(struct drm_device *d + int ret = 0; + unsigned int flags, seq; + +- if ((!dev->irq) || (!dev->irq_enabled)) ++ if ((!dev->pdev->irq) || (!dev->irq_enabled)) + return -EINVAL; + + if (vblwait->request.type & +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -84,7 +84,7 @@ static int i915_dma_cleanup(struct drm_d + * may not have been called from userspace and after dev_private + * is freed, it's too late. + */ +- if (dev->irq) ++ if (dev->irq_enabled) + drm_irq_uninstall(dev); + + if (dev_priv->ring.virtual_start) { +@@ -644,7 +644,7 @@ static int i915_getparam(struct drm_devi + + switch (param->param) { + case I915_PARAM_IRQ_ACTIVE: +- value = dev->irq ? 1 : 0; ++ value = dev->irq_enabled; + break; + case I915_PARAM_ALLOW_BATCHBUFFER: + value = dev_priv->allow_batchbuffer ? 1 : 0; +@@ -763,6 +763,20 @@ int i915_driver_load(struct drm_device * + ret = drm_addmap(dev, base, size, _DRM_REGISTERS, + _DRM_KERNEL | _DRM_DRIVER, + &dev_priv->mmio_map); ++ ++ ++ /* On the 945G/GM, the chipset reports the MSI capability on the ++ * integrated graphics even though the support isn't actually there ++ * according to the published specs. It doesn't appear to function ++ * correctly in testing on 945G. ++ * This may be a side effect of MSI having been made available for PEG ++ * and the registers being closely associated. ++ */ ++ if (!IS_I945G(dev) && !IS_I945GM(dev)) ++ pci_enable_msi(dev->pdev); ++ ++ spin_lock_init(&dev_priv->user_irq_lock); ++ + return ret; + } + +@@ -770,6 +784,9 @@ int i915_driver_unload(struct drm_device + { + struct drm_i915_private *dev_priv = dev->dev_private; + ++ if (dev->pdev->msi_enabled) ++ pci_disable_msi(dev->pdev); ++ + if (dev_priv->mmio_map) + drm_rmmap(dev, dev_priv->mmio_map); + +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -105,6 +105,12 @@ typedef struct drm_i915_private { + wait_queue_head_t irq_queue; + atomic_t irq_received; + atomic_t irq_emitted; ++ /** Protects user_irq_refcount and irq_mask_reg */ ++ spinlock_t user_irq_lock; ++ /** Refcount for i915_user_irq_get() versus i915_user_irq_put(). */ ++ int user_irq_refcount; ++ /** Cached value of IMR to avoid reads in updating the bitfield */ ++ u32 irq_mask_reg; + + int tex_lru_log_granularity; + int allow_batchbuffer; +--- a/drivers/gpu/drm/i915/i915_irq.c ++++ b/drivers/gpu/drm/i915/i915_irq.c +@@ -33,6 +33,31 @@ + + #define MAX_NOPID ((u32)~0) + ++/** These are the interrupts used by the driver */ ++#define I915_INTERRUPT_ENABLE_MASK (I915_USER_INTERRUPT | \ ++ I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | \ ++ I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) ++ ++static inline void ++i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask) ++{ ++ if ((dev_priv->irq_mask_reg & mask) != 0) { ++ dev_priv->irq_mask_reg &= ~mask; ++ I915_WRITE(IMR, dev_priv->irq_mask_reg); ++ (void) I915_READ(IMR); ++ } ++} ++ ++static inline void ++i915_disable_irq(drm_i915_private_t *dev_priv, u32 mask) ++{ ++ if ((dev_priv->irq_mask_reg & mask) != mask) { ++ dev_priv->irq_mask_reg |= mask; ++ I915_WRITE(IMR, dev_priv->irq_mask_reg); ++ (void) I915_READ(IMR); ++ } ++} ++ + /** + * Emit blits for scheduled buffer swaps. + * +@@ -229,46 +254,50 @@ irqreturn_t i915_driver_irq_handler(DRM_ + { + struct drm_device *dev = (struct drm_device *) arg; + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; +- u16 temp; + u32 pipea_stats, pipeb_stats; ++ u32 iir; + + pipea_stats = I915_READ(PIPEASTAT); + pipeb_stats = I915_READ(PIPEBSTAT); + +- temp = I915_READ16(IIR); +- +- temp &= (I915_USER_INTERRUPT | +- I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | +- I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT); +- +- DRM_DEBUG("%s flag=%08x\n", __FUNCTION__, temp); +- +- if (temp == 0) ++ if (dev->pdev->msi_enabled) ++ I915_WRITE(IMR, ~0); ++ iir = I915_READ(IIR); ++ ++ DRM_DEBUG("iir=%08x\n", iir); ++ ++ if (iir == 0) { ++ if (dev->pdev->msi_enabled) { ++ I915_WRITE(IMR, dev_priv->irq_mask_reg); ++ (void) I915_READ(IMR); ++ } + return IRQ_NONE; ++ } + +- I915_WRITE16(IIR, temp); +- (void) I915_READ16(IIR); +- DRM_READMEMORYBARRIER(); ++ I915_WRITE(IIR, iir); ++ if (dev->pdev->msi_enabled) ++ I915_WRITE(IMR, dev_priv->irq_mask_reg); ++ (void) I915_READ(IIR); /* Flush posted writes */ + + dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); + +- if (temp & I915_USER_INTERRUPT) ++ if (iir & I915_USER_INTERRUPT) + DRM_WAKEUP(&dev_priv->irq_queue); + +- if (temp & (I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | +- I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)) { ++ if (iir & (I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | ++ I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)) { + int vblank_pipe = dev_priv->vblank_pipe; + + if ((vblank_pipe & + (DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B)) + == (DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B)) { +- if (temp & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) ++ if (iir & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) + atomic_inc(&dev->vbl_received); +- if (temp & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) ++ if (iir & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) + atomic_inc(&dev->vbl_received2); +- } else if (((temp & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) && ++ } else if (((iir & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) && + (vblank_pipe & DRM_I915_VBLANK_PIPE_A)) || +- ((temp & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) && ++ ((iir & I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT) && + (vblank_pipe & DRM_I915_VBLANK_PIPE_B))) + atomic_inc(&dev->vbl_received); + +@@ -314,6 +343,27 @@ static int i915_emit_irq(struct drm_devi + return dev_priv->counter; + } + ++static void i915_user_irq_get(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; ++ ++ spin_lock(&dev_priv->user_irq_lock); ++ if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) ++ i915_enable_irq(dev_priv, I915_USER_INTERRUPT); ++ spin_unlock(&dev_priv->user_irq_lock); ++} ++ ++static void i915_user_irq_put(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; ++ ++ spin_lock(&dev_priv->user_irq_lock); ++ BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0); ++ if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) ++ i915_disable_irq(dev_priv, I915_USER_INTERRUPT); ++ spin_unlock(&dev_priv->user_irq_lock); ++} ++ + static int i915_wait_irq(struct drm_device * dev, int irq_nr) + { + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; +@@ -322,13 +372,17 @@ static int i915_wait_irq(struct drm_devi + DRM_DEBUG("irq_nr=%d breadcrumb=%d\n", irq_nr, + READ_BREADCRUMB(dev_priv)); + +- if (READ_BREADCRUMB(dev_priv) >= irq_nr) ++ if (READ_BREADCRUMB(dev_priv) >= irq_nr) { ++ dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); + return 0; ++ } + + dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; + ++ i915_user_irq_get(dev); + DRM_WAIT_ON(ret, dev_priv->irq_queue, 3 * DRM_HZ, + READ_BREADCRUMB(dev_priv) >= irq_nr); ++ i915_user_irq_put(dev); + + if (ret == -EBUSY) { + DRM_ERROR("EBUSY -- rec: %d emitted: %d\n", +@@ -413,20 +467,6 @@ int i915_irq_wait(struct drm_device *dev + return i915_wait_irq(dev, irqwait->irq_seq); + } + +-static void i915_enable_interrupt (struct drm_device *dev) +-{ +- drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; +- u16 flag; +- +- flag = 0; +- if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_A) +- flag |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT; +- if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_B) +- flag |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; +- +- I915_WRITE16(IER, I915_USER_INTERRUPT | flag); +-} +- + /* Set the vblank monitor pipe + */ + int i915_vblank_pipe_set(struct drm_device *dev, void *data, +@@ -434,6 +474,7 @@ int i915_vblank_pipe_set(struct drm_devi + { + drm_i915_private_t *dev_priv = dev->dev_private; + drm_i915_vblank_pipe_t *pipe = data; ++ u32 enable_mask = 0, disable_mask = 0; + + if (!dev_priv) { + DRM_ERROR("called with no initialization\n"); +@@ -445,9 +486,20 @@ int i915_vblank_pipe_set(struct drm_devi + return -EINVAL; + } + +- dev_priv->vblank_pipe = pipe->pipe; ++ if (pipe->pipe & DRM_I915_VBLANK_PIPE_A) ++ enable_mask |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT; ++ else ++ disable_mask |= I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT; ++ ++ if (pipe->pipe & DRM_I915_VBLANK_PIPE_B) ++ enable_mask |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; ++ else ++ disable_mask |= I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; + +- i915_enable_interrupt (dev); ++ i915_enable_irq(dev_priv, enable_mask); ++ i915_disable_irq(dev_priv, disable_mask); ++ ++ dev_priv->vblank_pipe = pipe->pipe; + + return 0; + } +@@ -464,7 +516,7 @@ int i915_vblank_pipe_get(struct drm_devi + return -EINVAL; + } + +- flag = I915_READ(IER); ++ flag = I915_READ(IMR); + pipe->pipe = 0; + if (flag & I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT) + pipe->pipe |= DRM_I915_VBLANK_PIPE_A; +@@ -586,9 +638,9 @@ void i915_driver_irq_preinstall(struct d + { + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + +- I915_WRITE16(HWSTAM, 0xfffe); +- I915_WRITE16(IMR, 0x0); +- I915_WRITE16(IER, 0x0); ++ I915_WRITE(HWSTAM, 0xfffe); ++ I915_WRITE(IMR, 0x0); ++ I915_WRITE(IER, 0x0); + } + + void i915_driver_irq_postinstall(struct drm_device * dev) +@@ -601,7 +653,18 @@ void i915_driver_irq_postinstall(struct + + if (!dev_priv->vblank_pipe) + dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A; +- i915_enable_interrupt(dev); ++ ++ /* Set initial unmasked IRQs to just the selected vblank pipes. */ ++ dev_priv->irq_mask_reg = ~0; ++ if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_A) ++ dev_priv->irq_mask_reg &= ~I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT; ++ if (dev_priv->vblank_pipe & DRM_I915_VBLANK_PIPE_B) ++ dev_priv->irq_mask_reg &= ~I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; ++ ++ I915_WRITE(IMR, dev_priv->irq_mask_reg); ++ I915_WRITE(IER, I915_INTERRUPT_ENABLE_MASK); ++ (void) I915_READ(IER); ++ + DRM_INIT_WAITQUEUE(&dev_priv->irq_queue); + } + +@@ -613,10 +676,10 @@ void i915_driver_irq_uninstall(struct dr + if (!dev_priv) + return; + +- I915_WRITE16(HWSTAM, 0xffff); +- I915_WRITE16(IMR, 0xffff); +- I915_WRITE16(IER, 0x0); ++ I915_WRITE(HWSTAM, 0xffff); ++ I915_WRITE(IMR, 0xffff); ++ I915_WRITE(IER, 0x0); + +- temp = I915_READ16(IIR); +- I915_WRITE16(IIR, temp); ++ temp = I915_READ(IIR); ++ I915_WRITE(IIR, temp); + } + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0004_i915.Track_progress_inside_of_batchbuffers_for_determining_wedgedness.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0004_i915.Track_progress_inside_of_batchbuffers_for_determining_wedgedness.patch new file mode 100644 index 000000000..c391d16b7 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0004_i915.Track_progress_inside_of_batchbuffers_for_determining_wedgedness.patch @@ -0,0 +1,47 @@ +From: Keith Packard <keithp@keithp.com> +Date: Wed, 30 Jul 2008 19:21:20 +0000 (-0700) +Subject: i915: Track progress inside of batchbuffers for determining wedgedness. +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=f0740db2246e4217384e8de32de6ebb4fbd807c9 + +i915: Track progress inside of batchbuffers for determining wedgedness. + +This avoids early termination for long-running commands. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -40,11 +40,15 @@ int i915_wait_ring(struct drm_device * d + { + drm_i915_private_t *dev_priv = dev->dev_private; + drm_i915_ring_buffer_t *ring = &(dev_priv->ring); ++ u32 acthd_reg = IS_I965G(dev) ? ACTHD_I965 : ACTHD; ++ u32 last_acthd = I915_READ(acthd_reg); ++ u32 acthd; + u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + int i; + +- for (i = 0; i < 10000; i++) { ++ for (i = 0; i < 100000; i++) { + ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; ++ acthd = I915_READ(acthd_reg); + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->Size; +@@ -55,8 +59,13 @@ int i915_wait_ring(struct drm_device * d + + if (ring->head != last_head) + i = 0; ++ if (acthd != last_acthd) ++ i = 0; + + last_head = ring->head; ++ last_acthd = acthd; ++ msleep_interruptible(10); ++ + } + + return -EBUSY; + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0005_i915.remove_settable_use_mi_batchbuffer_start.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0005_i915.remove_settable_use_mi_batchbuffer_start.patch new file mode 100644 index 000000000..12362fef5 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0005_i915.remove_settable_use_mi_batchbuffer_start.patch @@ -0,0 +1,59 @@ +From: Keith Packard <keithp@keithp.com> +Date: Wed, 30 Jul 2008 19:28:47 +0000 (-0700) +Subject: i915: remove settable use_mi_batchbuffer_start +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=6fcd9a69a91c53d733870df20e095eea2b73620c + +i915: remove settable use_mi_batchbuffer_start + +The driver can know what hardware requires MI_BATCH_BUFFER vs +MI_BATCH_BUFFER_START; there's no reason to let user mode configure this. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -159,13 +159,6 @@ static int i915_initialize(struct drm_de + dev_priv->current_page = 0; + dev_priv->sarea_priv->pf_current_page = dev_priv->current_page; + +- /* We are using separate values as placeholders for mechanisms for +- * private backbuffer/depthbuffer usage. +- */ +- dev_priv->use_mi_batchbuffer_start = 0; +- if (IS_I965G(dev)) /* 965 doesn't support older method */ +- dev_priv->use_mi_batchbuffer_start = 1; +- + /* Allow hardware batchbuffers unless told otherwise. + */ + dev_priv->allow_batchbuffer = 1; +@@ -486,7 +479,7 @@ static int i915_dispatch_batchbuffer(str + return ret; + } + +- if (dev_priv->use_mi_batchbuffer_start) { ++ if (!IS_I830(dev) && !IS_845G(dev)) { + BEGIN_LP_RING(2); + if (IS_I965G(dev)) { + OUT_RING(MI_BATCH_BUFFER_START | (2 << 6) | MI_BATCH_NON_SECURE_I965); +@@ -697,8 +690,6 @@ static int i915_setparam(struct drm_devi + + switch (param->param) { + case I915_SETPARAM_USE_MI_BATCHBUFFER_START: +- if (!IS_I965G(dev)) +- dev_priv->use_mi_batchbuffer_start = param->value; + break; + case I915_SETPARAM_TEX_LRU_LOG_GRANULARITY: + dev_priv->tex_lru_log_granularity = param->value; +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -99,7 +99,6 @@ typedef struct drm_i915_private { + int front_offset; + int current_page; + int page_flipping; +- int use_mi_batchbuffer_start; + + wait_queue_head_t irq_queue; + atomic_t irq_received; + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0006_i915.Ignore_X_server_provided_mmio_address.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0006_i915.Ignore_X_server_provided_mmio_address.patch new file mode 100644 index 000000000..397f683af --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0006_i915.Ignore_X_server_provided_mmio_address.patch @@ -0,0 +1,42 @@ +From: Keith Packard <keithp@keithp.com> +Date: Wed, 30 Jul 2008 19:36:08 +0000 (-0700) +Subject: i915: Ignore X server provided mmio address +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=5d34a0e06e6e70b01ee070094322695b9e3f0029 + +i915: Ignore X server provided mmio address + +It is already correctly detected by the kernel for use in suspend/resume. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -121,13 +121,6 @@ static int i915_initialize(struct drm_de + return -EINVAL; + } + +- dev_priv->mmio_map = drm_core_findmap(dev, init->mmio_offset); +- if (!dev_priv->mmio_map) { +- i915_dma_cleanup(dev); +- DRM_ERROR("can not find mmio map!\n"); +- return -EINVAL; +- } +- + dev_priv->sarea_priv = (drm_i915_sarea_t *) + ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset); + +@@ -194,11 +187,6 @@ static int i915_dma_resume(struct drm_de + return -EINVAL; + } + +- if (!dev_priv->mmio_map) { +- DRM_ERROR("can not find mmio map!\n"); +- return -EINVAL; +- } +- + if (dev_priv->ring.map.handle == NULL) { + DRM_ERROR("can not ioremap virtual address for" + " ring buffer\n"); + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0007_i915.Initialize_hardware_status_page_at_device_load_when_possible.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0007_i915.Initialize_hardware_status_page_at_device_load_when_possible.patch new file mode 100644 index 000000000..cf646f01c --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0007_i915.Initialize_hardware_status_page_at_device_load_when_possible.patch @@ -0,0 +1,138 @@ +From: Keith Packard <keithp@keithp.com> +Date: Wed, 30 Jul 2008 20:03:43 +0000 (-0700) +Subject: i915: Initialize hardware status page at device load when possible. +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=ddb354254f88965f5f057e67ef775fbb4b35fef8 + +i915: Initialize hardware status page at device load when possible. + +Some chips were unstable with repeated setup/teardown of the hardware status +page. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -71,6 +71,52 @@ int i915_wait_ring(struct drm_device * d + return -EBUSY; + } + ++/** ++ * Sets up the hardware status page for devices that need a physical address ++ * in the register. ++ */ ++int i915_init_phys_hws(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ /* Program Hardware Status Page */ ++ dev_priv->status_page_dmah = ++ drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, 0xffffffff); ++ ++ if (!dev_priv->status_page_dmah) { ++ DRM_ERROR("Can not allocate hardware status page\n"); ++ return -ENOMEM; ++ } ++ dev_priv->hw_status_page = dev_priv->status_page_dmah->vaddr; ++ dev_priv->dma_status_page = dev_priv->status_page_dmah->busaddr; ++ ++ memset(dev_priv->hw_status_page, 0, PAGE_SIZE); ++ ++ I915_WRITE(HWS_PGA, dev_priv->dma_status_page); ++ DRM_DEBUG("Enabled hardware status page\n"); ++ return 0; ++} ++ ++/** ++ * Frees the hardware status page, whether it's a physical address or a virtual ++ * address set up by the X Server. ++ */ ++void i915_free_hws(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ if (dev_priv->status_page_dmah) { ++ drm_pci_free(dev, dev_priv->status_page_dmah); ++ dev_priv->status_page_dmah = NULL; ++ } ++ ++ if (dev_priv->status_gfx_addr) { ++ dev_priv->status_gfx_addr = 0; ++ drm_core_ioremapfree(&dev_priv->hws_map, dev); ++ } ++ ++ /* Need to rewrite hardware status page */ ++ I915_WRITE(HWS_PGA, 0x1ffff000); ++} ++ + void i915_kernel_lost_context(struct drm_device * dev) + { + drm_i915_private_t *dev_priv = dev->dev_private; +@@ -103,18 +149,9 @@ static int i915_dma_cleanup(struct drm_d + dev_priv->ring.map.size = 0; + } + +- if (dev_priv->status_page_dmah) { +- drm_pci_free(dev, dev_priv->status_page_dmah); +- dev_priv->status_page_dmah = NULL; +- /* Need to rewrite hardware status page */ +- I915_WRITE(HWS_PGA, 0x1ffff000); +- } +- +- if (dev_priv->status_gfx_addr) { +- dev_priv->status_gfx_addr = 0; +- drm_core_ioremapfree(&dev_priv->hws_map, dev); +- I915_WRITE(HWS_PGA, 0x1ffff000); +- } ++ /* Clear the HWS virtual address at teardown */ ++ if (I915_NEED_GFX_HWS(dev)) ++ i915_free_hws(dev); + + return 0; + } +@@ -165,23 +202,6 @@ static int i915_initialize(struct drm_de + */ + dev_priv->allow_batchbuffer = 1; + +- /* Program Hardware Status Page */ +- if (!I915_NEED_GFX_HWS(dev)) { +- dev_priv->status_page_dmah = +- drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, 0xffffffff); +- +- if (!dev_priv->status_page_dmah) { +- i915_dma_cleanup(dev); +- DRM_ERROR("Can not allocate hardware status page\n"); +- return -ENOMEM; +- } +- dev_priv->hw_status_page = dev_priv->status_page_dmah->vaddr; +- dev_priv->dma_status_page = dev_priv->status_page_dmah->busaddr; +- +- memset(dev_priv->hw_status_page, 0, PAGE_SIZE); +- I915_WRITE(HWS_PGA, dev_priv->dma_status_page); +- } +- DRM_DEBUG("Enabled hardware status page\n"); + return 0; + } + +@@ -773,6 +793,12 @@ int i915_driver_load(struct drm_device * + _DRM_KERNEL | _DRM_DRIVER, + &dev_priv->mmio_map); + ++ /* Init HWS */ ++ if (!I915_NEED_GFX_HWS(dev)) { ++ ret = i915_init_phys_hws(dev); ++ if (ret != 0) ++ return ret; ++ } + + /* On the 945G/GM, the chipset reports the MSI capability on the + * integrated graphics even though the support isn't actually there +@@ -796,6 +822,8 @@ int i915_driver_unload(struct drm_device + if (dev->pdev->msi_enabled) + pci_disable_msi(dev->pdev); + ++ i915_free_hws(dev); ++ + if (dev_priv->mmio_map) + drm_rmmap(dev, dev_priv->mmio_map); + + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0008_drm.Add_GEM_graphics_execution_manager_to_i915_driver.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0008_drm.Add_GEM_graphics_execution_manager_to_i915_driver.patch new file mode 100644 index 000000000..e7ae851c4 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0008_drm.Add_GEM_graphics_execution_manager_to_i915_driver.patch @@ -0,0 +1,5453 @@ +From: Eric Anholt <eric@anholt.net> +Date: Wed, 30 Jul 2008 19:06:12 +0000 (-0700) +Subject: drm: Add GEM ("graphics execution manager") to i915 driver. +X-Git-Tag: v2.6.12-rc2 +X-Git-Url: http://gitweb.freedesktop.org/?p=users/anholt/anholt/linux-2.6.git;a=commitdiff;h=drm-gem-merge + +drm: Add GEM ("graphics execution manager") to i915 driver. + +GEM allows the creation of persistent buffer objects accessible by the +graphics device through new ioctls for managing execution of commands on the +device. The userland API is almost entirely driver-specific to ensure that +any driver building on this model can easily map the interface to individual +driver requirements. + +GEM is used by the 2d driver for managing its internal state allocations and +will be used for pixmap storage to reduce memory consumption and enable +zero-copy GLX_EXT_texture_from_pixmap, and in the 3d driver is used to enable +GL_EXT_framebuffer_object and GL_ARB_pixel_buffer_object. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + +--- a/drivers/gpu/drm/Makefile ++++ b/drivers/gpu/drm/Makefile +@@ -4,8 +4,9 @@ + + ccflags-y := -Iinclude/drm + +-drm-y := drm_auth.o drm_bufs.o drm_context.o drm_dma.o drm_drawable.o \ +- drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \ ++drm-y := drm_auth.o drm_bufs.o drm_cache.o \ ++ drm_context.o drm_dma.o drm_drawable.o \ ++ drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \ + drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \ + drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \ + drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o +--- a/drivers/gpu/drm/drm_agpsupport.c ++++ b/drivers/gpu/drm/drm_agpsupport.c +@@ -33,6 +33,7 @@ + + #include "drmP.h" + #include <linux/module.h> ++#include <asm/agp.h> + + #if __OS_HAS_AGP + +@@ -452,4 +453,52 @@ int drm_agp_unbind_memory(DRM_AGP_MEM * + return agp_unbind_memory(handle); + } + +-#endif /* __OS_HAS_AGP */ ++/** ++ * Binds a collection of pages into AGP memory at the given offset, returning ++ * the AGP memory structure containing them. ++ * ++ * No reference is held on the pages during this time -- it is up to the ++ * caller to handle that. ++ */ ++DRM_AGP_MEM * ++drm_agp_bind_pages(struct drm_device *dev, ++ struct page **pages, ++ unsigned long num_pages, ++ uint32_t gtt_offset) ++{ ++ DRM_AGP_MEM *mem; ++ int ret, i; ++ ++ DRM_DEBUG("\n"); ++ ++ mem = drm_agp_allocate_memory(dev->agp->bridge, num_pages, ++ AGP_USER_MEMORY); ++ if (mem == NULL) { ++ DRM_ERROR("Failed to allocate memory for %ld pages\n", ++ num_pages); ++ return NULL; ++ } ++ ++ for (i = 0; i < num_pages; i++) ++ mem->memory[i] = phys_to_gart(page_to_phys(pages[i])); ++ mem->page_count = num_pages; ++ ++ mem->is_flushed = true; ++ ret = drm_agp_bind_memory(mem, gtt_offset / PAGE_SIZE); ++ if (ret != 0) { ++ DRM_ERROR("Failed to bind AGP memory: %d\n", ret); ++ agp_free_memory(mem); ++ return NULL; ++ } ++ ++ return mem; ++} ++EXPORT_SYMBOL(drm_agp_bind_pages); ++ ++void drm_agp_chipset_flush(struct drm_device *dev) ++{ ++ agp_flush_chipset(dev->agp->bridge); ++} ++EXPORT_SYMBOL(drm_agp_chipset_flush); ++ ++#endif /* __OS_HAS_AGP */ +--- /dev/null ++++ b/drivers/gpu/drm/drm_cache.c +@@ -0,0 +1,76 @@ ++/************************************************************************** ++ * ++ * Copyright (c) 2006-2007 Tungsten Graphics, Inc., Cedar Park, TX., USA ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ **************************************************************************/ ++/* ++ * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> ++ */ ++ ++#include "drmP.h" ++ ++#if defined(CONFIG_X86) ++static void ++drm_clflush_page(struct page *page) ++{ ++ uint8_t *page_virtual; ++ unsigned int i; ++ ++ if (unlikely(page == NULL)) ++ return; ++ ++ page_virtual = kmap_atomic(page, KM_USER0); ++ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) ++ clflush(page_virtual + i); ++ kunmap_atomic(page_virtual, KM_USER0); ++} ++#endif ++ ++static void ++drm_clflush_ipi_handler(void *null) ++{ ++ wbinvd(); ++} ++ ++void ++drm_clflush_pages(struct page *pages[], unsigned long num_pages) ++{ ++ ++#if defined(CONFIG_X86) ++ if (cpu_has_clflush) { ++ unsigned long i; ++ ++ mb(); ++ for (i = 0; i < num_pages; ++i) ++ drm_clflush_page(*pages++); ++ mb(); ++ ++ return; ++ } ++#endif ++ ++ if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0) ++ DRM_ERROR("Timed out waiting for cache flush.\n"); ++} ++EXPORT_SYMBOL(drm_clflush_pages); +--- a/drivers/gpu/drm/drm_drv.c ++++ b/drivers/gpu/drm/drm_drv.c +@@ -117,6 +117,10 @@ static struct drm_ioctl_desc drm_ioctls[ + DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank, 0), + + DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_update_drawable_info, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), ++ ++ DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH), + }; + + #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) +--- a/drivers/gpu/drm/drm_fops.c ++++ b/drivers/gpu/drm/drm_fops.c +@@ -256,6 +256,9 @@ static int drm_open_helper(struct inode + + INIT_LIST_HEAD(&priv->lhead); + ++ if (dev->driver->driver_features & DRIVER_GEM) ++ drm_gem_open(dev, priv); ++ + if (dev->driver->open) { + ret = dev->driver->open(dev, priv); + if (ret < 0) +@@ -400,6 +403,9 @@ int drm_release(struct inode *inode, str + dev->driver->reclaim_buffers(dev, file_priv); + } + ++ if (dev->driver->driver_features & DRIVER_GEM) ++ drm_gem_release(dev, file_priv); ++ + drm_fasync(-1, filp, 0); + + mutex_lock(&dev->ctxlist_mutex); +--- /dev/null ++++ b/drivers/gpu/drm/drm_gem.c +@@ -0,0 +1,420 @@ ++/* ++ * Copyright © 2008 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Eric Anholt <eric@anholt.net> ++ * ++ */ ++ ++#include <linux/types.h> ++#include <linux/slab.h> ++#include <linux/mm.h> ++#include <linux/uaccess.h> ++#include <linux/fs.h> ++#include <linux/file.h> ++#include <linux/module.h> ++#include <linux/mman.h> ++#include <linux/pagemap.h> ++#include "drmP.h" ++ ++/** @file drm_gem.c ++ * ++ * This file provides some of the base ioctls and library routines for ++ * the graphics memory manager implemented by each device driver. ++ * ++ * Because various devices have different requirements in terms of ++ * synchronization and migration strategies, implementing that is left up to ++ * the driver, and all that the general API provides should be generic -- ++ * allocating objects, reading/writing data with the cpu, freeing objects. ++ * Even there, platform-dependent optimizations for reading/writing data with ++ * the CPU mean we'll likely hook those out to driver-specific calls. However, ++ * the DRI2 implementation wants to have at least allocate/mmap be generic. ++ * ++ * The goal was to have swap-backed object allocation managed through ++ * struct file. However, file descriptors as handles to a struct file have ++ * two major failings: ++ * - Process limits prevent more than 1024 or so being used at a time by ++ * default. ++ * - Inability to allocate high fds will aggravate the X Server's select() ++ * handling, and likely that of many GL client applications as well. ++ * ++ * This led to a plan of using our own integer IDs (called handles, following ++ * DRM terminology) to mimic fds, and implement the fd syscalls we need as ++ * ioctls. The objects themselves will still include the struct file so ++ * that we can transition to fds if the required kernel infrastructure shows ++ * up at a later date, and as our interface with shmfs for memory allocation. ++ */ ++ ++/** ++ * Initialize the GEM device fields ++ */ ++ ++int ++drm_gem_init(struct drm_device *dev) ++{ ++ spin_lock_init(&dev->object_name_lock); ++ idr_init(&dev->object_name_idr); ++ atomic_set(&dev->object_count, 0); ++ atomic_set(&dev->object_memory, 0); ++ atomic_set(&dev->pin_count, 0); ++ atomic_set(&dev->pin_memory, 0); ++ atomic_set(&dev->gtt_count, 0); ++ atomic_set(&dev->gtt_memory, 0); ++ return 0; ++} ++ ++/** ++ * Allocate a GEM object of the specified size with shmfs backing store ++ */ ++struct drm_gem_object * ++drm_gem_object_alloc(struct drm_device *dev, size_t size) ++{ ++ struct drm_gem_object *obj; ++ ++ BUG_ON((size & (PAGE_SIZE - 1)) != 0); ++ ++ obj = kcalloc(1, sizeof(*obj), GFP_KERNEL); ++ ++ obj->dev = dev; ++ obj->filp = shmem_file_setup("drm mm object", size, 0); ++ if (IS_ERR(obj->filp)) { ++ kfree(obj); ++ return NULL; ++ } ++ ++ kref_init(&obj->refcount); ++ kref_init(&obj->handlecount); ++ obj->size = size; ++ if (dev->driver->gem_init_object != NULL && ++ dev->driver->gem_init_object(obj) != 0) { ++ fput(obj->filp); ++ kfree(obj); ++ return NULL; ++ } ++ atomic_inc(&dev->object_count); ++ atomic_add(obj->size, &dev->object_memory); ++ return obj; ++} ++EXPORT_SYMBOL(drm_gem_object_alloc); ++ ++/** ++ * Removes the mapping from handle to filp for this object. ++ */ ++static int ++drm_gem_handle_delete(struct drm_file *filp, int handle) ++{ ++ struct drm_device *dev; ++ struct drm_gem_object *obj; ++ ++ /* This is gross. The idr system doesn't let us try a delete and ++ * return an error code. It just spews if you fail at deleting. ++ * So, we have to grab a lock around finding the object and then ++ * doing the delete on it and dropping the refcount, or the user ++ * could race us to double-decrement the refcount and cause a ++ * use-after-free later. Given the frequency of our handle lookups, ++ * we may want to use ida for number allocation and a hash table ++ * for the pointers, anyway. ++ */ ++ spin_lock(&filp->table_lock); ++ ++ /* Check if we currently have a reference on the object */ ++ obj = idr_find(&filp->object_idr, handle); ++ if (obj == NULL) { ++ spin_unlock(&filp->table_lock); ++ return -EINVAL; ++ } ++ dev = obj->dev; ++ ++ /* Release reference and decrement refcount. */ ++ idr_remove(&filp->object_idr, handle); ++ spin_unlock(&filp->table_lock); ++ ++ mutex_lock(&dev->struct_mutex); ++ drm_gem_object_handle_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ ++ return 0; ++} ++ ++/** ++ * Create a handle for this object. This adds a handle reference ++ * to the object, which includes a regular reference count. Callers ++ * will likely want to dereference the object afterwards. ++ */ ++int ++drm_gem_handle_create(struct drm_file *file_priv, ++ struct drm_gem_object *obj, ++ int *handlep) ++{ ++ int ret; ++ ++ /* ++ * Get the user-visible handle using idr. ++ */ ++again: ++ /* ensure there is space available to allocate a handle */ ++ if (idr_pre_get(&file_priv->object_idr, GFP_KERNEL) == 0) ++ return -ENOMEM; ++ ++ /* do the allocation under our spinlock */ ++ spin_lock(&file_priv->table_lock); ++ ret = idr_get_new_above(&file_priv->object_idr, obj, 1, handlep); ++ spin_unlock(&file_priv->table_lock); ++ if (ret == -EAGAIN) ++ goto again; ++ ++ if (ret != 0) ++ return ret; ++ ++ drm_gem_object_handle_reference(obj); ++ return 0; ++} ++EXPORT_SYMBOL(drm_gem_handle_create); ++ ++/** Returns a reference to the object named by the handle. */ ++struct drm_gem_object * ++drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp, ++ int handle) ++{ ++ struct drm_gem_object *obj; ++ ++ spin_lock(&filp->table_lock); ++ ++ /* Check if we currently have a reference on the object */ ++ obj = idr_find(&filp->object_idr, handle); ++ if (obj == NULL) { ++ spin_unlock(&filp->table_lock); ++ return NULL; ++ } ++ ++ drm_gem_object_reference(obj); ++ ++ spin_unlock(&filp->table_lock); ++ ++ return obj; ++} ++EXPORT_SYMBOL(drm_gem_object_lookup); ++ ++/** ++ * Releases the handle to an mm object. ++ */ ++int ++drm_gem_close_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_gem_close *args = data; ++ int ret; ++ ++ if (!(dev->driver->driver_features & DRIVER_GEM)) ++ return -ENODEV; ++ ++ ret = drm_gem_handle_delete(file_priv, args->handle); ++ ++ return ret; ++} ++ ++/** ++ * Create a global name for an object, returning the name. ++ * ++ * Note that the name does not hold a reference; when the object ++ * is freed, the name goes away. ++ */ ++int ++drm_gem_flink_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_gem_flink *args = data; ++ struct drm_gem_object *obj; ++ int ret; ++ ++ if (!(dev->driver->driver_features & DRIVER_GEM)) ++ return -ENODEV; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EINVAL; ++ ++again: ++ if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) ++ return -ENOMEM; ++ ++ spin_lock(&dev->object_name_lock); ++ if (obj->name) { ++ spin_unlock(&dev->object_name_lock); ++ return -EEXIST; ++ } ++ ret = idr_get_new_above(&dev->object_name_idr, obj, 1, ++ &obj->name); ++ spin_unlock(&dev->object_name_lock); ++ if (ret == -EAGAIN) ++ goto again; ++ ++ if (ret != 0) { ++ mutex_lock(&dev->struct_mutex); ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++ } ++ ++ /* ++ * Leave the reference from the lookup around as the ++ * name table now holds one ++ */ ++ args->name = (uint64_t) obj->name; ++ ++ return 0; ++} ++ ++/** ++ * Open an object using the global name, returning a handle and the size. ++ * ++ * This handle (of course) holds a reference to the object, so the object ++ * will not go away until the handle is deleted. ++ */ ++int ++drm_gem_open_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_gem_open *args = data; ++ struct drm_gem_object *obj; ++ int ret; ++ int handle; ++ ++ if (!(dev->driver->driver_features & DRIVER_GEM)) ++ return -ENODEV; ++ ++ spin_lock(&dev->object_name_lock); ++ obj = idr_find(&dev->object_name_idr, (int) args->name); ++ if (obj) ++ drm_gem_object_reference(obj); ++ spin_unlock(&dev->object_name_lock); ++ if (!obj) ++ return -ENOENT; ++ ++ ret = drm_gem_handle_create(file_priv, obj, &handle); ++ mutex_lock(&dev->struct_mutex); ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ if (ret) ++ return ret; ++ ++ args->handle = handle; ++ args->size = obj->size; ++ ++ return 0; ++} ++ ++/** ++ * Called at device open time, sets up the structure for handling refcounting ++ * of mm objects. ++ */ ++void ++drm_gem_open(struct drm_device *dev, struct drm_file *file_private) ++{ ++ idr_init(&file_private->object_idr); ++ spin_lock_init(&file_private->table_lock); ++} ++ ++/** ++ * Called at device close to release the file's ++ * handle references on objects. ++ */ ++static int ++drm_gem_object_release_handle(int id, void *ptr, void *data) ++{ ++ struct drm_gem_object *obj = ptr; ++ ++ drm_gem_object_handle_unreference(obj); ++ ++ return 0; ++} ++ ++/** ++ * Called at close time when the filp is going away. ++ * ++ * Releases any remaining references on objects by this filp. ++ */ ++void ++drm_gem_release(struct drm_device *dev, struct drm_file *file_private) ++{ ++ mutex_lock(&dev->struct_mutex); ++ idr_for_each(&file_private->object_idr, ++ &drm_gem_object_release_handle, NULL); ++ ++ idr_destroy(&file_private->object_idr); ++ mutex_unlock(&dev->struct_mutex); ++} ++ ++/** ++ * Called after the last reference to the object has been lost. ++ * ++ * Frees the object ++ */ ++void ++drm_gem_object_free(struct kref *kref) ++{ ++ struct drm_gem_object *obj = (struct drm_gem_object *) kref; ++ struct drm_device *dev = obj->dev; ++ ++ BUG_ON(!mutex_is_locked(&dev->struct_mutex)); ++ ++ if (dev->driver->gem_free_object != NULL) ++ dev->driver->gem_free_object(obj); ++ ++ fput(obj->filp); ++ atomic_dec(&dev->object_count); ++ atomic_sub(obj->size, &dev->object_memory); ++ kfree(obj); ++} ++EXPORT_SYMBOL(drm_gem_object_free); ++ ++/** ++ * Called after the last handle to the object has been closed ++ * ++ * Removes any name for the object. Note that this must be ++ * called before drm_gem_object_free or we'll be touching ++ * freed memory ++ */ ++void ++drm_gem_object_handle_free(struct kref *kref) ++{ ++ struct drm_gem_object *obj = container_of(kref, ++ struct drm_gem_object, ++ handlecount); ++ struct drm_device *dev = obj->dev; ++ ++ /* Remove any name for this object */ ++ spin_lock(&dev->object_name_lock); ++ if (obj->name) { ++ idr_remove(&dev->object_name_idr, obj->name); ++ spin_unlock(&dev->object_name_lock); ++ /* ++ * The object name held a reference to this object, drop ++ * that now. ++ */ ++ drm_gem_object_unreference(obj); ++ } else ++ spin_unlock(&dev->object_name_lock); ++ ++} ++EXPORT_SYMBOL(drm_gem_object_handle_free); ++ +--- a/drivers/gpu/drm/drm_memory.c ++++ b/drivers/gpu/drm/drm_memory.c +@@ -133,6 +133,7 @@ int drm_free_agp(DRM_AGP_MEM * handle, i + { + return drm_agp_free_memory(handle) ? 0 : -EINVAL; + } ++EXPORT_SYMBOL(drm_free_agp); + + /** Wrapper around agp_bind_memory() */ + int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start) +@@ -145,6 +146,7 @@ int drm_unbind_agp(DRM_AGP_MEM * handle) + { + return drm_agp_unbind_memory(handle); + } ++EXPORT_SYMBOL(drm_unbind_agp); + + #else /* __OS_HAS_AGP */ + static inline void *agp_remap(unsigned long offset, unsigned long size, +--- a/drivers/gpu/drm/drm_mm.c ++++ b/drivers/gpu/drm/drm_mm.c +@@ -169,6 +169,7 @@ struct drm_mm_node *drm_mm_get_block(str + + return child; + } ++EXPORT_SYMBOL(drm_mm_get_block); + + /* + * Put a block. Merge with the previous and / or next block if they are free. +@@ -217,6 +218,7 @@ void drm_mm_put_block(struct drm_mm_node + drm_free(cur, sizeof(*cur), DRM_MEM_MM); + } + } ++EXPORT_SYMBOL(drm_mm_put_block); + + struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm, + unsigned long size, +@@ -265,6 +267,7 @@ int drm_mm_clean(struct drm_mm * mm) + + return (head->next->next == head); + } ++EXPORT_SYMBOL(drm_mm_search_free); + + int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size) + { +@@ -273,7 +276,7 @@ int drm_mm_init(struct drm_mm * mm, unsi + + return drm_mm_create_tail_node(mm, start, size); + } +- ++EXPORT_SYMBOL(drm_mm_init); + + void drm_mm_takedown(struct drm_mm * mm) + { +--- a/drivers/gpu/drm/drm_proc.c ++++ b/drivers/gpu/drm/drm_proc.c +@@ -49,6 +49,10 @@ static int drm_queues_info(char *buf, ch + int request, int *eof, void *data); + static int drm_bufs_info(char *buf, char **start, off_t offset, + int request, int *eof, void *data); ++static int drm_gem_name_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data); ++static int drm_gem_object_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data); + #if DRM_DEBUG_CODE + static int drm_vma_info(char *buf, char **start, off_t offset, + int request, int *eof, void *data); +@@ -60,13 +64,16 @@ static int drm_vma_info(char *buf, char + static struct drm_proc_list { + const char *name; /**< file name */ + int (*f) (char *, char **, off_t, int, int *, void *); /**< proc callback*/ ++ u32 driver_features; /**< Required driver features for this entry */ + } drm_proc_list[] = { +- {"name", drm_name_info}, +- {"mem", drm_mem_info}, +- {"vm", drm_vm_info}, +- {"clients", drm_clients_info}, +- {"queues", drm_queues_info}, +- {"bufs", drm_bufs_info}, ++ {"name", drm_name_info, 0}, ++ {"mem", drm_mem_info, 0}, ++ {"vm", drm_vm_info, 0}, ++ {"clients", drm_clients_info, 0}, ++ {"queues", drm_queues_info, 0}, ++ {"bufs", drm_bufs_info, 0}, ++ {"gem_names", drm_gem_name_info, DRIVER_GEM}, ++ {"gem_objects", drm_gem_object_info, DRIVER_GEM}, + #if DRM_DEBUG_CODE + {"vma", drm_vma_info}, + #endif +@@ -90,8 +97,9 @@ static struct drm_proc_list { + int drm_proc_init(struct drm_minor *minor, int minor_id, + struct proc_dir_entry *root) + { ++ struct drm_device *dev = minor->dev; + struct proc_dir_entry *ent; +- int i, j; ++ int i, j, ret; + char name[64]; + + sprintf(name, "%d", minor_id); +@@ -102,23 +110,42 @@ int drm_proc_init(struct drm_minor *mino + } + + for (i = 0; i < DRM_PROC_ENTRIES; i++) { ++ u32 features = drm_proc_list[i].driver_features; ++ ++ if (features != 0 && ++ (dev->driver->driver_features & features) != features) ++ continue; ++ + ent = create_proc_entry(drm_proc_list[i].name, + S_IFREG | S_IRUGO, minor->dev_root); + if (!ent) { + DRM_ERROR("Cannot create /proc/dri/%s/%s\n", + name, drm_proc_list[i].name); +- for (j = 0; j < i; j++) +- remove_proc_entry(drm_proc_list[i].name, +- minor->dev_root); +- remove_proc_entry(name, root); +- minor->dev_root = NULL; +- return -1; ++ ret = -1; ++ goto fail; + } + ent->read_proc = drm_proc_list[i].f; + ent->data = minor; + } + ++ if (dev->driver->proc_init) { ++ ret = dev->driver->proc_init(minor); ++ if (ret) { ++ DRM_ERROR("DRM: Driver failed to initialize " ++ "/proc/dri.\n"); ++ goto fail; ++ } ++ } ++ + return 0; ++ fail: ++ ++ for (j = 0; j < i; j++) ++ remove_proc_entry(drm_proc_list[i].name, ++ minor->dev_root); ++ remove_proc_entry(name, root); ++ minor->dev_root = NULL; ++ return ret; + } + + /** +@@ -133,12 +160,16 @@ int drm_proc_init(struct drm_minor *mino + */ + int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root) + { ++ struct drm_device *dev = minor->dev; + int i; + char name[64]; + + if (!root || !minor->dev_root) + return 0; + ++ if (dev->driver->proc_cleanup) ++ dev->driver->proc_cleanup(minor); ++ + for (i = 0; i < DRM_PROC_ENTRIES; i++) + remove_proc_entry(drm_proc_list[i].name, minor->dev_root); + sprintf(name, "%d", minor->index); +@@ -480,6 +511,84 @@ static int drm_clients_info(char *buf, c + return ret; + } + ++struct drm_gem_name_info_data { ++ int len; ++ char *buf; ++ int eof; ++}; ++ ++static int drm_gem_one_name_info(int id, void *ptr, void *data) ++{ ++ struct drm_gem_object *obj = ptr; ++ struct drm_gem_name_info_data *nid = data; ++ ++ DRM_INFO("name %d size %d\n", obj->name, obj->size); ++ if (nid->eof) ++ return 0; ++ ++ nid->len += sprintf(&nid->buf[nid->len], ++ "%6d%9d%8d%9d\n", ++ obj->name, obj->size, ++ atomic_read(&obj->handlecount.refcount), ++ atomic_read(&obj->refcount.refcount)); ++ if (nid->len > DRM_PROC_LIMIT) { ++ nid->eof = 1; ++ return 0; ++ } ++ return 0; ++} ++ ++static int drm_gem_name_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ struct drm_gem_name_info_data nid; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ nid.len = sprintf(buf, " name size handles refcount\n"); ++ nid.buf = buf; ++ nid.eof = 0; ++ idr_for_each(&dev->object_name_idr, drm_gem_one_name_info, &nid); ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ if (nid.len > request + offset) ++ return request; ++ *eof = 1; ++ return nid.len - offset; ++} ++ ++static int drm_gem_object_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("%d objects\n", atomic_read(&dev->object_count)); ++ DRM_PROC_PRINT("%d object bytes\n", atomic_read(&dev->object_memory)); ++ DRM_PROC_PRINT("%d pinned\n", atomic_read(&dev->pin_count)); ++ DRM_PROC_PRINT("%d pin bytes\n", atomic_read(&dev->pin_memory)); ++ DRM_PROC_PRINT("%d gtt bytes\n", atomic_read(&dev->gtt_memory)); ++ DRM_PROC_PRINT("%d gtt total\n", dev->gtt_total); ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ + #if DRM_DEBUG_CODE + + static int drm__vma_info(char *buf, char **start, off_t offset, int request, +--- a/drivers/gpu/drm/drm_stub.c ++++ b/drivers/gpu/drm/drm_stub.c +@@ -152,6 +152,15 @@ static int drm_fill_in_dev(struct drm_de + goto error_out_unreg; + } + ++ if (driver->driver_features & DRIVER_GEM) { ++ retcode = drm_gem_init(dev); ++ if (retcode) { ++ DRM_ERROR("Cannot initialize graphics execution " ++ "manager (GEM)\n"); ++ goto error_out_unreg; ++ } ++ } ++ + return 0; + + error_out_unreg: +@@ -317,6 +326,7 @@ int drm_put_dev(struct drm_device * dev) + int drm_put_minor(struct drm_minor **minor_p) + { + struct drm_minor *minor = *minor_p; ++ + DRM_DEBUG("release secondary minor %d\n", minor->index); + + if (minor->type == DRM_MINOR_LEGACY) +--- a/drivers/gpu/drm/i915/Makefile ++++ b/drivers/gpu/drm/i915/Makefile +@@ -3,7 +3,11 @@ + # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. + + ccflags-y := -Iinclude/drm +-i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o ++i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ ++ i915_gem.o \ ++ i915_gem_debug.o \ ++ i915_gem_proc.o \ ++ i915_gem_tiling.o + + i915-$(CONFIG_COMPAT) += i915_ioc32.o + +--- a/drivers/gpu/drm/i915/i915_dma.c ++++ b/drivers/gpu/drm/i915/i915_dma.c +@@ -170,24 +170,31 @@ static int i915_initialize(struct drm_de + dev_priv->sarea_priv = (drm_i915_sarea_t *) + ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset); + +- dev_priv->ring.Start = init->ring_start; +- dev_priv->ring.End = init->ring_end; +- dev_priv->ring.Size = init->ring_size; +- dev_priv->ring.tail_mask = dev_priv->ring.Size - 1; +- +- dev_priv->ring.map.offset = init->ring_start; +- dev_priv->ring.map.size = init->ring_size; +- dev_priv->ring.map.type = 0; +- dev_priv->ring.map.flags = 0; +- dev_priv->ring.map.mtrr = 0; ++ if (init->ring_size != 0) { ++ if (dev_priv->ring.ring_obj != NULL) { ++ i915_dma_cleanup(dev); ++ DRM_ERROR("Client tried to initialize ringbuffer in " ++ "GEM mode\n"); ++ return -EINVAL; ++ } + +- drm_core_ioremap(&dev_priv->ring.map, dev); ++ dev_priv->ring.Size = init->ring_size; ++ dev_priv->ring.tail_mask = dev_priv->ring.Size - 1; + +- if (dev_priv->ring.map.handle == NULL) { +- i915_dma_cleanup(dev); +- DRM_ERROR("can not ioremap virtual address for" +- " ring buffer\n"); +- return -ENOMEM; ++ dev_priv->ring.map.offset = init->ring_start; ++ dev_priv->ring.map.size = init->ring_size; ++ dev_priv->ring.map.type = 0; ++ dev_priv->ring.map.flags = 0; ++ dev_priv->ring.map.mtrr = 0; ++ ++ drm_core_ioremap(&dev_priv->ring.map, dev); ++ ++ if (dev_priv->ring.map.handle == NULL) { ++ i915_dma_cleanup(dev); ++ DRM_ERROR("can not ioremap virtual address for" ++ " ring buffer\n"); ++ return -ENOMEM; ++ } + } + + dev_priv->ring.virtual_start = dev_priv->ring.map.handle; +@@ -377,9 +384,10 @@ static int i915_emit_cmds(struct drm_dev + return 0; + } + +-static int i915_emit_box(struct drm_device * dev, +- struct drm_clip_rect __user * boxes, +- int i, int DR1, int DR4) ++int ++i915_emit_box(struct drm_device *dev, ++ struct drm_clip_rect __user *boxes, ++ int i, int DR1, int DR4) + { + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_clip_rect box; +@@ -681,6 +689,9 @@ static int i915_getparam(struct drm_devi + case I915_PARAM_LAST_DISPATCH: + value = READ_BREADCRUMB(dev_priv); + break; ++ case I915_PARAM_HAS_GEM: ++ value = 1; ++ break; + default: + DRM_ERROR("Unknown parameter %d\n", param->param); + return -EINVAL; +@@ -784,6 +795,7 @@ int i915_driver_load(struct drm_device * + memset(dev_priv, 0, sizeof(drm_i915_private_t)); + + dev->dev_private = (void *)dev_priv; ++ dev_priv->dev = dev; + + /* Add register map (needed for suspend/resume) */ + base = drm_get_resource_start(dev, mmio_bar); +@@ -793,6 +805,8 @@ int i915_driver_load(struct drm_device * + _DRM_KERNEL | _DRM_DRIVER, + &dev_priv->mmio_map); + ++ i915_gem_load(dev); ++ + /* Init HWS */ + if (!I915_NEED_GFX_HWS(dev)) { + ret = i915_init_phys_hws(dev); +@@ -833,6 +847,25 @@ int i915_driver_unload(struct drm_device + return 0; + } + ++int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv) ++{ ++ struct drm_i915_file_private *i915_file_priv; ++ ++ DRM_DEBUG("\n"); ++ i915_file_priv = (struct drm_i915_file_private *) ++ drm_alloc(sizeof(*i915_file_priv), DRM_MEM_FILES); ++ ++ if (!i915_file_priv) ++ return -ENOMEM; ++ ++ file_priv->driver_priv = i915_file_priv; ++ ++ i915_file_priv->mm.last_gem_seqno = 0; ++ i915_file_priv->mm.last_gem_throttle_seqno = 0; ++ ++ return 0; ++} ++ + void i915_driver_lastclose(struct drm_device * dev) + { + drm_i915_private_t *dev_priv = dev->dev_private; +@@ -840,6 +873,8 @@ void i915_driver_lastclose(struct drm_de + if (!dev_priv) + return; + ++ i915_gem_lastclose(dev); ++ + if (dev_priv->agp_heap) + i915_mem_takedown(&(dev_priv->agp_heap)); + +@@ -852,6 +887,13 @@ void i915_driver_preclose(struct drm_dev + i915_mem_release(dev, file_priv, dev_priv->agp_heap); + } + ++void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) ++{ ++ struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; ++ ++ drm_free(i915_file_priv, sizeof(*i915_file_priv), DRM_MEM_FILES); ++} ++ + struct drm_ioctl_desc i915_ioctls[] = { + DRM_IOCTL_DEF(DRM_I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_I915_FLUSH, i915_flush_ioctl, DRM_AUTH), +@@ -870,6 +912,22 @@ struct drm_ioctl_desc i915_ioctls[] = { + DRM_IOCTL_DEF(DRM_I915_GET_VBLANK_PIPE, i915_vblank_pipe_get, DRM_AUTH ), + DRM_IOCTL_DEF(DRM_I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH), + DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), ++ DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), ++ DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_I915_GEM_CREATE, i915_gem_create_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0), ++ DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), + }; + + int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -542,11 +542,13 @@ static struct drm_driver driver = { + .driver_features = + DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | /* DRIVER_USE_MTRR |*/ + DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_IRQ_VBL | +- DRIVER_IRQ_VBL2, ++ DRIVER_IRQ_VBL2 | DRIVER_GEM, + .load = i915_driver_load, + .unload = i915_driver_unload, ++ .open = i915_driver_open, + .lastclose = i915_driver_lastclose, + .preclose = i915_driver_preclose, ++ .postclose = i915_driver_postclose, + .suspend = i915_suspend, + .resume = i915_resume, + .device_is_agp = i915_driver_device_is_agp, +@@ -559,6 +561,10 @@ static struct drm_driver driver = { + .reclaim_buffers = drm_core_reclaim_buffers, + .get_map_ofs = drm_core_get_map_ofs, + .get_reg_ofs = drm_core_get_reg_ofs, ++ .proc_init = i915_gem_proc_init, ++ .proc_cleanup = i915_gem_proc_cleanup, ++ .gem_init_object = i915_gem_init_object, ++ .gem_free_object = i915_gem_free_object, + .ioctls = i915_ioctls, + .fops = { + .owner = THIS_MODULE, +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -39,7 +39,7 @@ + + #define DRIVER_NAME "i915" + #define DRIVER_DESC "Intel Graphics" +-#define DRIVER_DATE "20060119" ++#define DRIVER_DATE "20080730" + + /* Interface history: + * +@@ -55,16 +55,23 @@ + #define DRIVER_MINOR 6 + #define DRIVER_PATCHLEVEL 0 + ++#define WATCH_COHERENCY 0 ++#define WATCH_BUF 0 ++#define WATCH_EXEC 0 ++#define WATCH_LRU 0 ++#define WATCH_RELOC 0 ++#define WATCH_INACTIVE 0 ++#define WATCH_PWRITE 0 ++ + typedef struct _drm_i915_ring_buffer { + int tail_mask; +- unsigned long Start; +- unsigned long End; + unsigned long Size; + u8 *virtual_start; + int head; + int tail; + int space; + drm_local_map_t map; ++ struct drm_gem_object *ring_obj; + } drm_i915_ring_buffer_t; + + struct mem_block { +@@ -83,6 +90,8 @@ typedef struct _drm_i915_vbl_swap { + } drm_i915_vbl_swap_t; + + typedef struct drm_i915_private { ++ struct drm_device *dev; ++ + drm_local_map_t *sarea; + drm_local_map_t *mmio_map; + +@@ -95,6 +104,7 @@ typedef struct drm_i915_private { + unsigned long counter; + unsigned int status_gfx_addr; + drm_local_map_t hws_map; ++ struct drm_gem_object *hws_obj; + + unsigned int cpp; + int back_offset; +@@ -104,7 +114,6 @@ typedef struct drm_i915_private { + + wait_queue_head_t irq_queue; + atomic_t irq_received; +- atomic_t irq_emitted; + /** Protects user_irq_refcount and irq_mask_reg */ + spinlock_t user_irq_lock; + /** Refcount for i915_user_irq_get() versus i915_user_irq_put(). */ +@@ -210,8 +219,174 @@ typedef struct drm_i915_private { + u8 saveDACMASK; + u8 saveDACDATA[256*3]; /* 256 3-byte colors */ + u8 saveCR[37]; ++ ++ struct { ++ struct drm_mm gtt_space; ++ ++ /** ++ * List of objects currently involved in rendering from the ++ * ringbuffer. ++ * ++ * A reference is held on the buffer while on this list. ++ */ ++ struct list_head active_list; ++ ++ /** ++ * List of objects which are not in the ringbuffer but which ++ * still have a write_domain which needs to be flushed before ++ * unbinding. ++ * ++ * A reference is held on the buffer while on this list. ++ */ ++ struct list_head flushing_list; ++ ++ /** ++ * LRU list of objects which are not in the ringbuffer and ++ * are ready to unbind, but are still in the GTT. ++ * ++ * A reference is not held on the buffer while on this list, ++ * as merely being GTT-bound shouldn't prevent its being ++ * freed, and we'll pull it off the list in the free path. ++ */ ++ struct list_head inactive_list; ++ ++ /** ++ * List of breadcrumbs associated with GPU requests currently ++ * outstanding. ++ */ ++ struct list_head request_list; ++ ++ /** ++ * We leave the user IRQ off as much as possible, ++ * but this means that requests will finish and never ++ * be retired once the system goes idle. Set a timer to ++ * fire periodically while the ring is running. When it ++ * fires, go retire requests. ++ */ ++ struct delayed_work retire_work; ++ ++ uint32_t next_gem_seqno; ++ ++ /** ++ * Waiting sequence number, if any ++ */ ++ uint32_t waiting_gem_seqno; ++ ++ /** ++ * Last seq seen at irq time ++ */ ++ uint32_t irq_gem_seqno; ++ ++ /** ++ * Flag if the X Server, and thus DRM, is not currently in ++ * control of the device. ++ * ++ * This is set between LeaveVT and EnterVT. It needs to be ++ * replaced with a semaphore. It also needs to be ++ * transitioned away from for kernel modesetting. ++ */ ++ int suspended; ++ ++ /** ++ * Flag if the hardware appears to be wedged. ++ * ++ * This is set when attempts to idle the device timeout. ++ * It prevents command submission from occuring and makes ++ * every pending request fail ++ */ ++ int wedged; ++ ++ /** Bit 6 swizzling required for X tiling */ ++ uint32_t bit_6_swizzle_x; ++ /** Bit 6 swizzling required for Y tiling */ ++ uint32_t bit_6_swizzle_y; ++ } mm; + } drm_i915_private_t; + ++/** driver private structure attached to each drm_gem_object */ ++struct drm_i915_gem_object { ++ struct drm_gem_object *obj; ++ ++ /** Current space allocated to this object in the GTT, if any. */ ++ struct drm_mm_node *gtt_space; ++ ++ /** This object's place on the active/flushing/inactive lists */ ++ struct list_head list; ++ ++ /** ++ * This is set if the object is on the active or flushing lists ++ * (has pending rendering), and is not set if it's on inactive (ready ++ * to be unbound). ++ */ ++ int active; ++ ++ /** ++ * This is set if the object has been written to since last bound ++ * to the GTT ++ */ ++ int dirty; ++ ++ /** AGP memory structure for our GTT binding. */ ++ DRM_AGP_MEM *agp_mem; ++ ++ struct page **page_list; ++ ++ /** ++ * Current offset of the object in GTT space. ++ * ++ * This is the same as gtt_space->start ++ */ ++ uint32_t gtt_offset; ++ ++ /** Boolean whether this object has a valid gtt offset. */ ++ int gtt_bound; ++ ++ /** How many users have pinned this object in GTT space */ ++ int pin_count; ++ ++ /** Breadcrumb of last rendering to the buffer. */ ++ uint32_t last_rendering_seqno; ++ ++ /** Current tiling mode for the object. */ ++ uint32_t tiling_mode; ++ ++ /** ++ * Flagging of which individual pages are valid in GEM_DOMAIN_CPU when ++ * GEM_DOMAIN_CPU is not in the object's read domain. ++ */ ++ uint8_t *page_cpu_valid; ++}; ++ ++/** ++ * Request queue structure. ++ * ++ * The request queue allows us to note sequence numbers that have been emitted ++ * and may be associated with active buffers to be retired. ++ * ++ * By keeping this list, we can avoid having to do questionable ++ * sequence-number comparisons on buffer last_rendering_seqnos, and associate ++ * an emission time with seqnos for tracking how far ahead of the GPU we are. ++ */ ++struct drm_i915_gem_request { ++ /** GEM sequence number associated with this request. */ ++ uint32_t seqno; ++ ++ /** Time at which this request was emitted, in jiffies. */ ++ unsigned long emitted_jiffies; ++ ++ /** Cache domains that were flushed at the start of the request. */ ++ uint32_t flush_domains; ++ ++ struct list_head list; ++}; ++ ++struct drm_i915_file_private { ++ struct { ++ uint32_t last_gem_seqno; ++ uint32_t last_gem_throttle_seqno; ++ } mm; ++}; ++ + extern struct drm_ioctl_desc i915_ioctls[]; + extern int i915_max_ioctl; + +@@ -219,18 +394,26 @@ extern int i915_max_ioctl; + extern void i915_kernel_lost_context(struct drm_device * dev); + extern int i915_driver_load(struct drm_device *, unsigned long flags); + extern int i915_driver_unload(struct drm_device *); ++extern int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv); + extern void i915_driver_lastclose(struct drm_device * dev); + extern void i915_driver_preclose(struct drm_device *dev, + struct drm_file *file_priv); ++extern void i915_driver_postclose(struct drm_device *dev, ++ struct drm_file *file_priv); + extern int i915_driver_device_is_agp(struct drm_device * dev); + extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg); ++extern int i915_emit_box(struct drm_device *dev, ++ struct drm_clip_rect __user *boxes, ++ int i, int DR1, int DR4); + + /* i915_irq.c */ + extern int i915_irq_emit(struct drm_device *dev, void *data, + struct drm_file *file_priv); + extern int i915_irq_wait(struct drm_device *dev, void *data, + struct drm_file *file_priv); ++void i915_user_irq_get(struct drm_device *dev); ++void i915_user_irq_put(struct drm_device *dev); + + extern int i915_driver_vblank_wait(struct drm_device *dev, unsigned int *sequence); + extern int i915_driver_vblank_wait2(struct drm_device *dev, unsigned int *sequence); +@@ -257,6 +440,67 @@ extern int i915_mem_destroy_heap(struct + extern void i915_mem_takedown(struct mem_block **heap); + extern void i915_mem_release(struct drm_device * dev, + struct drm_file *file_priv, struct mem_block *heap); ++/* i915_gem.c */ ++int i915_gem_init_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_create_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_pread_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_execbuffer(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_pin_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_unpin_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_busy_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_set_tiling(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int i915_gem_get_tiling(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++void i915_gem_load(struct drm_device *dev); ++int i915_gem_proc_init(struct drm_minor *minor); ++void i915_gem_proc_cleanup(struct drm_minor *minor); ++int i915_gem_init_object(struct drm_gem_object *obj); ++void i915_gem_free_object(struct drm_gem_object *obj); ++int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment); ++void i915_gem_object_unpin(struct drm_gem_object *obj); ++void i915_gem_lastclose(struct drm_device *dev); ++uint32_t i915_get_gem_seqno(struct drm_device *dev); ++void i915_gem_retire_requests(struct drm_device *dev); ++void i915_gem_retire_work_handler(struct work_struct *work); ++void i915_gem_clflush_object(struct drm_gem_object *obj); ++ ++/* i915_gem_tiling.c */ ++void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); ++ ++/* i915_gem_debug.c */ ++void i915_gem_dump_object(struct drm_gem_object *obj, int len, ++ const char *where, uint32_t mark); ++#if WATCH_INACTIVE ++void i915_verify_inactive(struct drm_device *dev, char *file, int line); ++#else ++#define i915_verify_inactive(dev, file, line) ++#endif ++void i915_gem_object_check_coherency(struct drm_gem_object *obj, int handle); ++void i915_gem_dump_object(struct drm_gem_object *obj, int len, ++ const char *where, uint32_t mark); ++void i915_dump_lru(struct drm_device *dev, const char *where); + + #define I915_READ(reg) DRM_READ32(dev_priv->mmio_map, (reg)) + #define I915_WRITE(reg,val) DRM_WRITE32(dev_priv->mmio_map, (reg), (val)) +@@ -309,6 +553,7 @@ extern void i915_mem_release(struct drm_ + */ + #define READ_HWSP(dev_priv, reg) (((volatile u32*)(dev_priv->hw_status_page))[reg]) + #define READ_BREADCRUMB(dev_priv) READ_HWSP(dev_priv, 5) ++#define I915_GEM_HWS_INDEX 0x10 + + extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); + +--- /dev/null ++++ b/drivers/gpu/drm/i915/i915_gem.c +@@ -0,0 +1,2508 @@ ++/* ++ * Copyright © 2008 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Eric Anholt <eric@anholt.net> ++ * ++ */ ++ ++#include "drmP.h" ++#include "drm.h" ++#include "i915_drm.h" ++#include "i915_drv.h" ++#include <linux/swap.h> ++ ++static int ++i915_gem_object_set_domain(struct drm_gem_object *obj, ++ uint32_t read_domains, ++ uint32_t write_domain); ++static int ++i915_gem_object_set_domain_range(struct drm_gem_object *obj, ++ uint64_t offset, ++ uint64_t size, ++ uint32_t read_domains, ++ uint32_t write_domain); ++static int ++i915_gem_set_domain(struct drm_gem_object *obj, ++ struct drm_file *file_priv, ++ uint32_t read_domains, ++ uint32_t write_domain); ++static int i915_gem_object_get_page_list(struct drm_gem_object *obj); ++static void i915_gem_object_free_page_list(struct drm_gem_object *obj); ++static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); ++ ++int ++i915_gem_init_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_init *args = data; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ if (args->gtt_start >= args->gtt_end || ++ (args->gtt_start & (PAGE_SIZE - 1)) != 0 || ++ (args->gtt_end & (PAGE_SIZE - 1)) != 0) { ++ mutex_unlock(&dev->struct_mutex); ++ return -EINVAL; ++ } ++ ++ drm_mm_init(&dev_priv->mm.gtt_space, args->gtt_start, ++ args->gtt_end - args->gtt_start); ++ ++ dev->gtt_total = (uint32_t) (args->gtt_end - args->gtt_start); ++ ++ mutex_unlock(&dev->struct_mutex); ++ ++ return 0; ++} ++ ++ ++/** ++ * Creates a new mm object and returns a handle to it. ++ */ ++int ++i915_gem_create_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_create *args = data; ++ struct drm_gem_object *obj; ++ int handle, ret; ++ ++ args->size = roundup(args->size, PAGE_SIZE); ++ ++ /* Allocate the new object */ ++ obj = drm_gem_object_alloc(dev, args->size); ++ if (obj == NULL) ++ return -ENOMEM; ++ ++ ret = drm_gem_handle_create(file_priv, obj, &handle); ++ mutex_lock(&dev->struct_mutex); ++ drm_gem_object_handle_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ ++ if (ret) ++ return ret; ++ ++ args->handle = handle; ++ ++ return 0; ++} ++ ++/** ++ * Reads data from the object referenced by handle. ++ * ++ * On error, the contents of *data are undefined. ++ */ ++int ++i915_gem_pread_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_pread *args = data; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ ssize_t read; ++ loff_t offset; ++ int ret; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EBADF; ++ obj_priv = obj->driver_private; ++ ++ /* Bounds check source. ++ * ++ * XXX: This could use review for overflow issues... ++ */ ++ if (args->offset > obj->size || args->size > obj->size || ++ args->offset + args->size > obj->size) { ++ drm_gem_object_unreference(obj); ++ return -EINVAL; ++ } ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ ret = i915_gem_object_set_domain_range(obj, args->offset, args->size, ++ I915_GEM_DOMAIN_CPU, 0); ++ if (ret != 0) { ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ } ++ ++ offset = args->offset; ++ ++ read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr, ++ args->size, &offset); ++ if (read != args->size) { ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ if (read < 0) ++ return read; ++ else ++ return -EINVAL; ++ } ++ ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ ++ return 0; ++} ++ ++static int ++i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, ++ struct drm_i915_gem_pwrite *args, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ssize_t remain; ++ loff_t offset; ++ char __user *user_data; ++ char *vaddr; ++ int i, o, l; ++ int ret = 0; ++ unsigned long pfn; ++ unsigned long unwritten; ++ ++ user_data = (char __user *) (uintptr_t) args->data_ptr; ++ remain = args->size; ++ if (!access_ok(VERIFY_READ, user_data, remain)) ++ return -EFAULT; ++ ++ ++ mutex_lock(&dev->struct_mutex); ++ ret = i915_gem_object_pin(obj, 0); ++ if (ret) { ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++ } ++ ret = i915_gem_set_domain(obj, file_priv, ++ I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); ++ if (ret) ++ goto fail; ++ ++ obj_priv = obj->driver_private; ++ offset = obj_priv->gtt_offset + args->offset; ++ obj_priv->dirty = 1; ++ ++ while (remain > 0) { ++ /* Operation in this page ++ * ++ * i = page number ++ * o = offset within page ++ * l = bytes to copy ++ */ ++ i = offset >> PAGE_SHIFT; ++ o = offset & (PAGE_SIZE-1); ++ l = remain; ++ if ((o + l) > PAGE_SIZE) ++ l = PAGE_SIZE - o; ++ ++ pfn = (dev->agp->base >> PAGE_SHIFT) + i; ++ ++#ifdef DRM_KMAP_ATOMIC_PROT_PFN ++ /* kmap_atomic can't map IO pages on non-HIGHMEM kernels ++ */ ++ vaddr = kmap_atomic_prot_pfn(pfn, KM_USER0, ++ __pgprot(__PAGE_KERNEL)); ++#if WATCH_PWRITE ++ DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", ++ i, o, l, pfn, vaddr); ++#endif ++ unwritten = __copy_from_user_inatomic_nocache(vaddr + o, ++ user_data, l); ++ kunmap_atomic(vaddr, KM_USER0); ++ ++ if (unwritten) ++#endif ++ { ++ vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); ++#if WATCH_PWRITE ++ DRM_INFO("pwrite slow i %d o %d l %d " ++ "pfn %ld vaddr %p\n", ++ i, o, l, pfn, vaddr); ++#endif ++ if (vaddr == NULL) { ++ ret = -EFAULT; ++ goto fail; ++ } ++ unwritten = __copy_from_user(vaddr + o, user_data, l); ++#if WATCH_PWRITE ++ DRM_INFO("unwritten %ld\n", unwritten); ++#endif ++ iounmap(vaddr); ++ if (unwritten) { ++ ret = -EFAULT; ++ goto fail; ++ } ++ } ++ ++ remain -= l; ++ user_data += l; ++ offset += l; ++ } ++#if WATCH_PWRITE && 1 ++ i915_gem_clflush_object(obj); ++ i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0); ++ i915_gem_clflush_object(obj); ++#endif ++ ++fail: ++ i915_gem_object_unpin(obj); ++ mutex_unlock(&dev->struct_mutex); ++ ++ return ret; ++} ++ ++int ++i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, ++ struct drm_i915_gem_pwrite *args, ++ struct drm_file *file_priv) ++{ ++ int ret; ++ loff_t offset; ++ ssize_t written; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ ret = i915_gem_set_domain(obj, file_priv, ++ I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); ++ if (ret) { ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++ } ++ ++ offset = args->offset; ++ ++ written = vfs_write(obj->filp, ++ (char __user *)(uintptr_t) args->data_ptr, ++ args->size, &offset); ++ if (written != args->size) { ++ mutex_unlock(&dev->struct_mutex); ++ if (written < 0) ++ return written; ++ else ++ return -EINVAL; ++ } ++ ++ mutex_unlock(&dev->struct_mutex); ++ ++ return 0; ++} ++ ++/** ++ * Writes data to the object referenced by handle. ++ * ++ * On error, the contents of the buffer that were to be modified are undefined. ++ */ ++int ++i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_pwrite *args = data; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret = 0; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EBADF; ++ obj_priv = obj->driver_private; ++ ++ /* Bounds check destination. ++ * ++ * XXX: This could use review for overflow issues... ++ */ ++ if (args->offset > obj->size || args->size > obj->size || ++ args->offset + args->size > obj->size) { ++ drm_gem_object_unreference(obj); ++ return -EINVAL; ++ } ++ ++ /* We can only do the GTT pwrite on untiled buffers, as otherwise ++ * it would end up going through the fenced access, and we'll get ++ * different detiling behavior between reading and writing. ++ * pread/pwrite currently are reading and writing from the CPU ++ * perspective, requiring manual detiling by the client. ++ */ ++ if (obj_priv->tiling_mode == I915_TILING_NONE && ++ dev->gtt_total != 0) ++ ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); ++ else ++ ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); ++ ++#if WATCH_PWRITE ++ if (ret) ++ DRM_INFO("pwrite failed %d\n", ret); ++#endif ++ ++ drm_gem_object_unreference(obj); ++ ++ return ret; ++} ++ ++/** ++ * Called when user space prepares to use an object ++ */ ++int ++i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_set_domain *args = data; ++ struct drm_gem_object *obj; ++ int ret; ++ ++ if (!(dev->driver->driver_features & DRIVER_GEM)) ++ return -ENODEV; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EBADF; ++ ++ mutex_lock(&dev->struct_mutex); ++#if WATCH_BUF ++ DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n", ++ obj, obj->size, args->read_domains, args->write_domain); ++#endif ++ ret = i915_gem_set_domain(obj, file_priv, ++ args->read_domains, args->write_domain); ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++} ++ ++/** ++ * Called when user space has done writes to this buffer ++ */ ++int ++i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_sw_finish *args = data; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret = 0; ++ ++ if (!(dev->driver->driver_features & DRIVER_GEM)) ++ return -ENODEV; ++ ++ mutex_lock(&dev->struct_mutex); ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) { ++ mutex_unlock(&dev->struct_mutex); ++ return -EBADF; ++ } ++ ++#if WATCH_BUF ++ DRM_INFO("%s: sw_finish %d (%p %d)\n", ++ __func__, args->handle, obj, obj->size); ++#endif ++ obj_priv = obj->driver_private; ++ ++ /* Pinned buffers may be scanout, so flush the cache */ ++ if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) { ++ i915_gem_clflush_object(obj); ++ drm_agp_chipset_flush(dev); ++ } ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++} ++ ++/** ++ * Maps the contents of an object, returning the address it is mapped ++ * into. ++ * ++ * While the mapping holds a reference on the contents of the object, it doesn't ++ * imply a ref on the object itself. ++ */ ++int ++i915_gem_mmap_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_mmap *args = data; ++ struct drm_gem_object *obj; ++ loff_t offset; ++ unsigned long addr; ++ ++ if (!(dev->driver->driver_features & DRIVER_GEM)) ++ return -ENODEV; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EBADF; ++ ++ offset = args->offset; ++ ++ down_write(¤t->mm->mmap_sem); ++ addr = do_mmap(obj->filp, 0, args->size, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ args->offset); ++ up_write(¤t->mm->mmap_sem); ++ mutex_lock(&dev->struct_mutex); ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ if (IS_ERR((void *)addr)) ++ return addr; ++ ++ args->addr_ptr = (uint64_t) addr; ++ ++ return 0; ++} ++ ++static void ++i915_gem_object_free_page_list(struct drm_gem_object *obj) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int page_count = obj->size / PAGE_SIZE; ++ int i; ++ ++ if (obj_priv->page_list == NULL) ++ return; ++ ++ ++ for (i = 0; i < page_count; i++) ++ if (obj_priv->page_list[i] != NULL) { ++ if (obj_priv->dirty) ++ set_page_dirty(obj_priv->page_list[i]); ++ mark_page_accessed(obj_priv->page_list[i]); ++ page_cache_release(obj_priv->page_list[i]); ++ } ++ obj_priv->dirty = 0; ++ ++ drm_free(obj_priv->page_list, ++ page_count * sizeof(struct page *), ++ DRM_MEM_DRIVER); ++ obj_priv->page_list = NULL; ++} ++ ++static void ++i915_gem_object_move_to_active(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ /* Add a reference if we're newly entering the active list. */ ++ if (!obj_priv->active) { ++ drm_gem_object_reference(obj); ++ obj_priv->active = 1; ++ } ++ /* Move from whatever list we were on to the tail of execution. */ ++ list_move_tail(&obj_priv->list, ++ &dev_priv->mm.active_list); ++} ++ ++ ++static void ++i915_gem_object_move_to_inactive(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ if (obj_priv->pin_count != 0) ++ list_del_init(&obj_priv->list); ++ else ++ list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); ++ ++ if (obj_priv->active) { ++ obj_priv->active = 0; ++ drm_gem_object_unreference(obj); ++ } ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++} ++ ++/** ++ * Creates a new sequence number, emitting a write of it to the status page ++ * plus an interrupt, which will trigger i915_user_interrupt_handler. ++ * ++ * Must be called with struct_lock held. ++ * ++ * Returned sequence numbers are nonzero on success. ++ */ ++static uint32_t ++i915_add_request(struct drm_device *dev, uint32_t flush_domains) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_request *request; ++ uint32_t seqno; ++ int was_empty; ++ RING_LOCALS; ++ ++ request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); ++ if (request == NULL) ++ return 0; ++ ++ /* Grab the seqno we're going to make this request be, and bump the ++ * next (skipping 0 so it can be the reserved no-seqno value). ++ */ ++ seqno = dev_priv->mm.next_gem_seqno; ++ dev_priv->mm.next_gem_seqno++; ++ if (dev_priv->mm.next_gem_seqno == 0) ++ dev_priv->mm.next_gem_seqno++; ++ ++ BEGIN_LP_RING(4); ++ OUT_RING(MI_STORE_DWORD_INDEX); ++ OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); ++ OUT_RING(seqno); ++ ++ OUT_RING(MI_USER_INTERRUPT); ++ ADVANCE_LP_RING(); ++ ++ DRM_DEBUG("%d\n", seqno); ++ ++ request->seqno = seqno; ++ request->emitted_jiffies = jiffies; ++ request->flush_domains = flush_domains; ++ was_empty = list_empty(&dev_priv->mm.request_list); ++ list_add_tail(&request->list, &dev_priv->mm.request_list); ++ ++ if (was_empty) ++ schedule_delayed_work(&dev_priv->mm.retire_work, HZ); ++ return seqno; ++} ++ ++/** ++ * Command execution barrier ++ * ++ * Ensures that all commands in the ring are finished ++ * before signalling the CPU ++ */ ++uint32_t ++i915_retire_commands(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; ++ uint32_t flush_domains = 0; ++ RING_LOCALS; ++ ++ /* The sampler always gets flushed on i965 (sigh) */ ++ if (IS_I965G(dev)) ++ flush_domains |= I915_GEM_DOMAIN_SAMPLER; ++ BEGIN_LP_RING(2); ++ OUT_RING(cmd); ++ OUT_RING(0); /* noop */ ++ ADVANCE_LP_RING(); ++ return flush_domains; ++} ++ ++/** ++ * Moves buffers associated only with the given active seqno from the active ++ * to inactive list, potentially freeing them. ++ */ ++static void ++i915_gem_retire_request(struct drm_device *dev, ++ struct drm_i915_gem_request *request) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ ++ /* Move any buffers on the active list that are no longer referenced ++ * by the ringbuffer to the flushing/inactive lists as appropriate. ++ */ ++ while (!list_empty(&dev_priv->mm.active_list)) { ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ ++ obj_priv = list_first_entry(&dev_priv->mm.active_list, ++ struct drm_i915_gem_object, ++ list); ++ obj = obj_priv->obj; ++ ++ /* If the seqno being retired doesn't match the oldest in the ++ * list, then the oldest in the list must still be newer than ++ * this seqno. ++ */ ++ if (obj_priv->last_rendering_seqno != request->seqno) ++ return; ++#if WATCH_LRU ++ DRM_INFO("%s: retire %d moves to inactive list %p\n", ++ __func__, request->seqno, obj); ++#endif ++ ++ if (obj->write_domain != 0) { ++ list_move_tail(&obj_priv->list, ++ &dev_priv->mm.flushing_list); ++ } else { ++ i915_gem_object_move_to_inactive(obj); ++ } ++ } ++ ++ if (request->flush_domains != 0) { ++ struct drm_i915_gem_object *obj_priv, *next; ++ ++ /* Clear the write domain and activity from any buffers ++ * that are just waiting for a flush matching the one retired. ++ */ ++ list_for_each_entry_safe(obj_priv, next, ++ &dev_priv->mm.flushing_list, list) { ++ struct drm_gem_object *obj = obj_priv->obj; ++ ++ if (obj->write_domain & request->flush_domains) { ++ obj->write_domain = 0; ++ i915_gem_object_move_to_inactive(obj); ++ } ++ } ++ ++ } ++} ++ ++/** ++ * Returns true if seq1 is later than seq2. ++ */ ++static int ++i915_seqno_passed(uint32_t seq1, uint32_t seq2) ++{ ++ return (int32_t)(seq1 - seq2) >= 0; ++} ++ ++uint32_t ++i915_get_gem_seqno(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ ++ return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); ++} ++ ++/** ++ * This function clears the request list as sequence numbers are passed. ++ */ ++void ++i915_gem_retire_requests(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ uint32_t seqno; ++ ++ seqno = i915_get_gem_seqno(dev); ++ ++ while (!list_empty(&dev_priv->mm.request_list)) { ++ struct drm_i915_gem_request *request; ++ uint32_t retiring_seqno; ++ ++ request = list_first_entry(&dev_priv->mm.request_list, ++ struct drm_i915_gem_request, ++ list); ++ retiring_seqno = request->seqno; ++ ++ if (i915_seqno_passed(seqno, retiring_seqno) || ++ dev_priv->mm.wedged) { ++ i915_gem_retire_request(dev, request); ++ ++ list_del(&request->list); ++ drm_free(request, sizeof(*request), DRM_MEM_DRIVER); ++ } else ++ break; ++ } ++} ++ ++void ++i915_gem_retire_work_handler(struct work_struct *work) ++{ ++ drm_i915_private_t *dev_priv; ++ struct drm_device *dev; ++ ++ dev_priv = container_of(work, drm_i915_private_t, ++ mm.retire_work.work); ++ dev = dev_priv->dev; ++ ++ mutex_lock(&dev->struct_mutex); ++ i915_gem_retire_requests(dev); ++ if (!list_empty(&dev_priv->mm.request_list)) ++ schedule_delayed_work(&dev_priv->mm.retire_work, HZ); ++ mutex_unlock(&dev->struct_mutex); ++} ++ ++/** ++ * Waits for a sequence number to be signaled, and cleans up the ++ * request and object lists appropriately for that event. ++ */ ++int ++i915_wait_request(struct drm_device *dev, uint32_t seqno) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ int ret = 0; ++ ++ BUG_ON(seqno == 0); ++ ++ if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { ++ dev_priv->mm.waiting_gem_seqno = seqno; ++ i915_user_irq_get(dev); ++ ret = wait_event_interruptible(dev_priv->irq_queue, ++ i915_seqno_passed(i915_get_gem_seqno(dev), ++ seqno) || ++ dev_priv->mm.wedged); ++ i915_user_irq_put(dev); ++ dev_priv->mm.waiting_gem_seqno = 0; ++ } ++ if (dev_priv->mm.wedged) ++ ret = -EIO; ++ ++ if (ret && ret != -ERESTARTSYS) ++ DRM_ERROR("%s returns %d (awaiting %d at %d)\n", ++ __func__, ret, seqno, i915_get_gem_seqno(dev)); ++ ++ /* Directly dispatch request retiring. While we have the work queue ++ * to handle this, the waiter on a request often wants an associated ++ * buffer to have made it to the inactive list, and we would need ++ * a separate wait queue to handle that. ++ */ ++ if (ret == 0) ++ i915_gem_retire_requests(dev); ++ ++ return ret; ++} ++ ++static void ++i915_gem_flush(struct drm_device *dev, ++ uint32_t invalidate_domains, ++ uint32_t flush_domains) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ uint32_t cmd; ++ RING_LOCALS; ++ ++#if WATCH_EXEC ++ DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, ++ invalidate_domains, flush_domains); ++#endif ++ ++ if (flush_domains & I915_GEM_DOMAIN_CPU) ++ drm_agp_chipset_flush(dev); ++ ++ if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | ++ I915_GEM_DOMAIN_GTT)) { ++ /* ++ * read/write caches: ++ * ++ * I915_GEM_DOMAIN_RENDER is always invalidated, but is ++ * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is ++ * also flushed at 2d versus 3d pipeline switches. ++ * ++ * read-only caches: ++ * ++ * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if ++ * MI_READ_FLUSH is set, and is always flushed on 965. ++ * ++ * I915_GEM_DOMAIN_COMMAND may not exist? ++ * ++ * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is ++ * invalidated when MI_EXE_FLUSH is set. ++ * ++ * I915_GEM_DOMAIN_VERTEX, which exists on 965, is ++ * invalidated with every MI_FLUSH. ++ * ++ * TLBs: ++ * ++ * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND ++ * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and ++ * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER ++ * are flushed at any MI_FLUSH. ++ */ ++ ++ cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; ++ if ((invalidate_domains|flush_domains) & ++ I915_GEM_DOMAIN_RENDER) ++ cmd &= ~MI_NO_WRITE_FLUSH; ++ if (!IS_I965G(dev)) { ++ /* ++ * On the 965, the sampler cache always gets flushed ++ * and this bit is reserved. ++ */ ++ if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) ++ cmd |= MI_READ_FLUSH; ++ } ++ if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) ++ cmd |= MI_EXE_FLUSH; ++ ++#if WATCH_EXEC ++ DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); ++#endif ++ BEGIN_LP_RING(2); ++ OUT_RING(cmd); ++ OUT_RING(0); /* noop */ ++ ADVANCE_LP_RING(); ++ } ++} ++ ++/** ++ * Ensures that all rendering to the object has completed and the object is ++ * safe to unbind from the GTT or access from the CPU. ++ */ ++static int ++i915_gem_object_wait_rendering(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int ret; ++ ++ /* If there are writes queued to the buffer, flush and ++ * create a new seqno to wait for. ++ */ ++ if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { ++ uint32_t write_domain = obj->write_domain; ++#if WATCH_BUF ++ DRM_INFO("%s: flushing object %p from write domain %08x\n", ++ __func__, obj, write_domain); ++#endif ++ i915_gem_flush(dev, 0, write_domain); ++ ++ i915_gem_object_move_to_active(obj); ++ obj_priv->last_rendering_seqno = i915_add_request(dev, ++ write_domain); ++ BUG_ON(obj_priv->last_rendering_seqno == 0); ++#if WATCH_LRU ++ DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj); ++#endif ++ } ++ ++ /* If there is rendering queued on the buffer being evicted, wait for ++ * it. ++ */ ++ if (obj_priv->active) { ++#if WATCH_BUF ++ DRM_INFO("%s: object %p wait for seqno %08x\n", ++ __func__, obj, obj_priv->last_rendering_seqno); ++#endif ++ ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); ++ if (ret != 0) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * Unbinds an object from the GTT aperture. ++ */ ++static int ++i915_gem_object_unbind(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int ret = 0; ++ ++#if WATCH_BUF ++ DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); ++ DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); ++#endif ++ if (obj_priv->gtt_space == NULL) ++ return 0; ++ ++ if (obj_priv->pin_count != 0) { ++ DRM_ERROR("Attempting to unbind pinned buffer\n"); ++ return -EINVAL; ++ } ++ ++ /* Wait for any rendering to complete ++ */ ++ ret = i915_gem_object_wait_rendering(obj); ++ if (ret) { ++ DRM_ERROR("wait_rendering failed: %d\n", ret); ++ return ret; ++ } ++ ++ /* Move the object to the CPU domain to ensure that ++ * any possible CPU writes while it's not in the GTT ++ * are flushed when we go to remap it. This will ++ * also ensure that all pending GPU writes are finished ++ * before we unbind. ++ */ ++ ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU, ++ I915_GEM_DOMAIN_CPU); ++ if (ret) { ++ DRM_ERROR("set_domain failed: %d\n", ret); ++ return ret; ++ } ++ ++ if (obj_priv->agp_mem != NULL) { ++ drm_unbind_agp(obj_priv->agp_mem); ++ drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); ++ obj_priv->agp_mem = NULL; ++ } ++ ++ BUG_ON(obj_priv->active); ++ ++ i915_gem_object_free_page_list(obj); ++ ++ if (obj_priv->gtt_space) { ++ atomic_dec(&dev->gtt_count); ++ atomic_sub(obj->size, &dev->gtt_memory); ++ ++ drm_mm_put_block(obj_priv->gtt_space); ++ obj_priv->gtt_space = NULL; ++ } ++ ++ /* Remove ourselves from the LRU list if present. */ ++ if (!list_empty(&obj_priv->list)) ++ list_del_init(&obj_priv->list); ++ ++ return 0; ++} ++ ++static int ++i915_gem_evict_something(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret = 0; ++ ++ for (;;) { ++ /* If there's an inactive buffer available now, grab it ++ * and be done. ++ */ ++ if (!list_empty(&dev_priv->mm.inactive_list)) { ++ obj_priv = list_first_entry(&dev_priv->mm.inactive_list, ++ struct drm_i915_gem_object, ++ list); ++ obj = obj_priv->obj; ++ BUG_ON(obj_priv->pin_count != 0); ++#if WATCH_LRU ++ DRM_INFO("%s: evicting %p\n", __func__, obj); ++#endif ++ BUG_ON(obj_priv->active); ++ ++ /* Wait on the rendering and unbind the buffer. */ ++ ret = i915_gem_object_unbind(obj); ++ break; ++ } ++ ++ /* If we didn't get anything, but the ring is still processing ++ * things, wait for one of those things to finish and hopefully ++ * leave us a buffer to evict. ++ */ ++ if (!list_empty(&dev_priv->mm.request_list)) { ++ struct drm_i915_gem_request *request; ++ ++ request = list_first_entry(&dev_priv->mm.request_list, ++ struct drm_i915_gem_request, ++ list); ++ ++ ret = i915_wait_request(dev, request->seqno); ++ if (ret) ++ break; ++ ++ /* if waiting caused an object to become inactive, ++ * then loop around and wait for it. Otherwise, we ++ * assume that waiting freed and unbound something, ++ * so there should now be some space in the GTT ++ */ ++ if (!list_empty(&dev_priv->mm.inactive_list)) ++ continue; ++ break; ++ } ++ ++ /* If we didn't have anything on the request list but there ++ * are buffers awaiting a flush, emit one and try again. ++ * When we wait on it, those buffers waiting for that flush ++ * will get moved to inactive. ++ */ ++ if (!list_empty(&dev_priv->mm.flushing_list)) { ++ obj_priv = list_first_entry(&dev_priv->mm.flushing_list, ++ struct drm_i915_gem_object, ++ list); ++ obj = obj_priv->obj; ++ ++ i915_gem_flush(dev, ++ obj->write_domain, ++ obj->write_domain); ++ i915_add_request(dev, obj->write_domain); ++ ++ obj = NULL; ++ continue; ++ } ++ ++ DRM_ERROR("inactive empty %d request empty %d " ++ "flushing empty %d\n", ++ list_empty(&dev_priv->mm.inactive_list), ++ list_empty(&dev_priv->mm.request_list), ++ list_empty(&dev_priv->mm.flushing_list)); ++ /* If we didn't do any of the above, there's nothing to be done ++ * and we just can't fit it in. ++ */ ++ return -ENOMEM; ++ } ++ return ret; ++} ++ ++static int ++i915_gem_object_get_page_list(struct drm_gem_object *obj) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int page_count, i; ++ struct address_space *mapping; ++ struct inode *inode; ++ struct page *page; ++ int ret; ++ ++ if (obj_priv->page_list) ++ return 0; ++ ++ /* Get the list of pages out of our struct file. They'll be pinned ++ * at this point until we release them. ++ */ ++ page_count = obj->size / PAGE_SIZE; ++ BUG_ON(obj_priv->page_list != NULL); ++ obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *), ++ DRM_MEM_DRIVER); ++ if (obj_priv->page_list == NULL) { ++ DRM_ERROR("Faled to allocate page list\n"); ++ return -ENOMEM; ++ } ++ ++ inode = obj->filp->f_path.dentry->d_inode; ++ mapping = inode->i_mapping; ++ for (i = 0; i < page_count; i++) { ++ page = read_mapping_page(mapping, i, NULL); ++ if (IS_ERR(page)) { ++ ret = PTR_ERR(page); ++ DRM_ERROR("read_mapping_page failed: %d\n", ret); ++ i915_gem_object_free_page_list(obj); ++ return ret; ++ } ++ obj_priv->page_list[i] = page; ++ } ++ return 0; ++} ++ ++/** ++ * Finds free space in the GTT aperture and binds the object there. ++ */ ++static int ++i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) ++{ ++ struct drm_device *dev = obj->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ struct drm_mm_node *free_space; ++ int page_count, ret; ++ ++ if (alignment == 0) ++ alignment = PAGE_SIZE; ++ if (alignment & (PAGE_SIZE - 1)) { ++ DRM_ERROR("Invalid object alignment requested %u\n", alignment); ++ return -EINVAL; ++ } ++ ++ search_free: ++ free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, ++ obj->size, alignment, 0); ++ if (free_space != NULL) { ++ obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, ++ alignment); ++ if (obj_priv->gtt_space != NULL) { ++ obj_priv->gtt_space->private = obj; ++ obj_priv->gtt_offset = obj_priv->gtt_space->start; ++ } ++ } ++ if (obj_priv->gtt_space == NULL) { ++ /* If the gtt is empty and we're still having trouble ++ * fitting our object in, we're out of memory. ++ */ ++#if WATCH_LRU ++ DRM_INFO("%s: GTT full, evicting something\n", __func__); ++#endif ++ if (list_empty(&dev_priv->mm.inactive_list) && ++ list_empty(&dev_priv->mm.flushing_list) && ++ list_empty(&dev_priv->mm.active_list)) { ++ DRM_ERROR("GTT full, but LRU list empty\n"); ++ return -ENOMEM; ++ } ++ ++ ret = i915_gem_evict_something(dev); ++ if (ret != 0) { ++ DRM_ERROR("Failed to evict a buffer %d\n", ret); ++ return ret; ++ } ++ goto search_free; ++ } ++ ++#if WATCH_BUF ++ DRM_INFO("Binding object of size %d at 0x%08x\n", ++ obj->size, obj_priv->gtt_offset); ++#endif ++ ret = i915_gem_object_get_page_list(obj); ++ if (ret) { ++ drm_mm_put_block(obj_priv->gtt_space); ++ obj_priv->gtt_space = NULL; ++ return ret; ++ } ++ ++ page_count = obj->size / PAGE_SIZE; ++ /* Create an AGP memory structure pointing at our pages, and bind it ++ * into the GTT. ++ */ ++ obj_priv->agp_mem = drm_agp_bind_pages(dev, ++ obj_priv->page_list, ++ page_count, ++ obj_priv->gtt_offset); ++ if (obj_priv->agp_mem == NULL) { ++ i915_gem_object_free_page_list(obj); ++ drm_mm_put_block(obj_priv->gtt_space); ++ obj_priv->gtt_space = NULL; ++ return -ENOMEM; ++ } ++ atomic_inc(&dev->gtt_count); ++ atomic_add(obj->size, &dev->gtt_memory); ++ ++ /* Assert that the object is not currently in any GPU domain. As it ++ * wasn't in the GTT, there shouldn't be any way it could have been in ++ * a GPU cache ++ */ ++ BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); ++ BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); ++ ++ return 0; ++} ++ ++void ++i915_gem_clflush_object(struct drm_gem_object *obj) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ /* If we don't have a page list set up, then we're not pinned ++ * to GPU, and we can ignore the cache flush because it'll happen ++ * again at bind time. ++ */ ++ if (obj_priv->page_list == NULL) ++ return; ++ ++ drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); ++} ++ ++/* ++ * Set the next domain for the specified object. This ++ * may not actually perform the necessary flushing/invaliding though, ++ * as that may want to be batched with other set_domain operations ++ * ++ * This is (we hope) the only really tricky part of gem. The goal ++ * is fairly simple -- track which caches hold bits of the object ++ * and make sure they remain coherent. A few concrete examples may ++ * help to explain how it works. For shorthand, we use the notation ++ * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the ++ * a pair of read and write domain masks. ++ * ++ * Case 1: the batch buffer ++ * ++ * 1. Allocated ++ * 2. Written by CPU ++ * 3. Mapped to GTT ++ * 4. Read by GPU ++ * 5. Unmapped from GTT ++ * 6. Freed ++ * ++ * Let's take these a step at a time ++ * ++ * 1. Allocated ++ * Pages allocated from the kernel may still have ++ * cache contents, so we set them to (CPU, CPU) always. ++ * 2. Written by CPU (using pwrite) ++ * The pwrite function calls set_domain (CPU, CPU) and ++ * this function does nothing (as nothing changes) ++ * 3. Mapped by GTT ++ * This function asserts that the object is not ++ * currently in any GPU-based read or write domains ++ * 4. Read by GPU ++ * i915_gem_execbuffer calls set_domain (COMMAND, 0). ++ * As write_domain is zero, this function adds in the ++ * current read domains (CPU+COMMAND, 0). ++ * flush_domains is set to CPU. ++ * invalidate_domains is set to COMMAND ++ * clflush is run to get data out of the CPU caches ++ * then i915_dev_set_domain calls i915_gem_flush to ++ * emit an MI_FLUSH and drm_agp_chipset_flush ++ * 5. Unmapped from GTT ++ * i915_gem_object_unbind calls set_domain (CPU, CPU) ++ * flush_domains and invalidate_domains end up both zero ++ * so no flushing/invalidating happens ++ * 6. Freed ++ * yay, done ++ * ++ * Case 2: The shared render buffer ++ * ++ * 1. Allocated ++ * 2. Mapped to GTT ++ * 3. Read/written by GPU ++ * 4. set_domain to (CPU,CPU) ++ * 5. Read/written by CPU ++ * 6. Read/written by GPU ++ * ++ * 1. Allocated ++ * Same as last example, (CPU, CPU) ++ * 2. Mapped to GTT ++ * Nothing changes (assertions find that it is not in the GPU) ++ * 3. Read/written by GPU ++ * execbuffer calls set_domain (RENDER, RENDER) ++ * flush_domains gets CPU ++ * invalidate_domains gets GPU ++ * clflush (obj) ++ * MI_FLUSH and drm_agp_chipset_flush ++ * 4. set_domain (CPU, CPU) ++ * flush_domains gets GPU ++ * invalidate_domains gets CPU ++ * wait_rendering (obj) to make sure all drawing is complete. ++ * This will include an MI_FLUSH to get the data from GPU ++ * to memory ++ * clflush (obj) to invalidate the CPU cache ++ * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) ++ * 5. Read/written by CPU ++ * cache lines are loaded and dirtied ++ * 6. Read written by GPU ++ * Same as last GPU access ++ * ++ * Case 3: The constant buffer ++ * ++ * 1. Allocated ++ * 2. Written by CPU ++ * 3. Read by GPU ++ * 4. Updated (written) by CPU again ++ * 5. Read by GPU ++ * ++ * 1. Allocated ++ * (CPU, CPU) ++ * 2. Written by CPU ++ * (CPU, CPU) ++ * 3. Read by GPU ++ * (CPU+RENDER, 0) ++ * flush_domains = CPU ++ * invalidate_domains = RENDER ++ * clflush (obj) ++ * MI_FLUSH ++ * drm_agp_chipset_flush ++ * 4. Updated (written) by CPU again ++ * (CPU, CPU) ++ * flush_domains = 0 (no previous write domain) ++ * invalidate_domains = 0 (no new read domains) ++ * 5. Read by GPU ++ * (CPU+RENDER, 0) ++ * flush_domains = CPU ++ * invalidate_domains = RENDER ++ * clflush (obj) ++ * MI_FLUSH ++ * drm_agp_chipset_flush ++ */ ++static int ++i915_gem_object_set_domain(struct drm_gem_object *obj, ++ uint32_t read_domains, ++ uint32_t write_domain) ++{ ++ struct drm_device *dev = obj->dev; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ uint32_t invalidate_domains = 0; ++ uint32_t flush_domains = 0; ++ int ret; ++ ++#if WATCH_BUF ++ DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", ++ __func__, obj, ++ obj->read_domains, read_domains, ++ obj->write_domain, write_domain); ++#endif ++ /* ++ * If the object isn't moving to a new write domain, ++ * let the object stay in multiple read domains ++ */ ++ if (write_domain == 0) ++ read_domains |= obj->read_domains; ++ else ++ obj_priv->dirty = 1; ++ ++ /* ++ * Flush the current write domain if ++ * the new read domains don't match. Invalidate ++ * any read domains which differ from the old ++ * write domain ++ */ ++ if (obj->write_domain && obj->write_domain != read_domains) { ++ flush_domains |= obj->write_domain; ++ invalidate_domains |= read_domains & ~obj->write_domain; ++ } ++ /* ++ * Invalidate any read caches which may have ++ * stale data. That is, any new read domains. ++ */ ++ invalidate_domains |= read_domains & ~obj->read_domains; ++ if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { ++#if WATCH_BUF ++ DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", ++ __func__, flush_domains, invalidate_domains); ++#endif ++ /* ++ * If we're invaliding the CPU cache and flushing a GPU cache, ++ * then pause for rendering so that the GPU caches will be ++ * flushed before the cpu cache is invalidated ++ */ ++ if ((invalidate_domains & I915_GEM_DOMAIN_CPU) && ++ (flush_domains & ~(I915_GEM_DOMAIN_CPU | ++ I915_GEM_DOMAIN_GTT))) { ++ ret = i915_gem_object_wait_rendering(obj); ++ if (ret) ++ return ret; ++ } ++ i915_gem_clflush_object(obj); ++ } ++ ++ if ((write_domain | flush_domains) != 0) ++ obj->write_domain = write_domain; ++ ++ /* If we're invalidating the CPU domain, clear the per-page CPU ++ * domain list as well. ++ */ ++ if (obj_priv->page_cpu_valid != NULL && ++ (obj->read_domains & I915_GEM_DOMAIN_CPU) && ++ ((read_domains & I915_GEM_DOMAIN_CPU) == 0)) { ++ memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); ++ } ++ obj->read_domains = read_domains; ++ ++ dev->invalidate_domains |= invalidate_domains; ++ dev->flush_domains |= flush_domains; ++#if WATCH_BUF ++ DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", ++ __func__, ++ obj->read_domains, obj->write_domain, ++ dev->invalidate_domains, dev->flush_domains); ++#endif ++ return 0; ++} ++ ++/** ++ * Set the read/write domain on a range of the object. ++ * ++ * Currently only implemented for CPU reads, otherwise drops to normal ++ * i915_gem_object_set_domain(). ++ */ ++static int ++i915_gem_object_set_domain_range(struct drm_gem_object *obj, ++ uint64_t offset, ++ uint64_t size, ++ uint32_t read_domains, ++ uint32_t write_domain) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int ret, i; ++ ++ if (obj->read_domains & I915_GEM_DOMAIN_CPU) ++ return 0; ++ ++ if (read_domains != I915_GEM_DOMAIN_CPU || ++ write_domain != 0) ++ return i915_gem_object_set_domain(obj, ++ read_domains, write_domain); ++ ++ /* Wait on any GPU rendering to the object to be flushed. */ ++ if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) { ++ ret = i915_gem_object_wait_rendering(obj); ++ if (ret) ++ return ret; ++ } ++ ++ if (obj_priv->page_cpu_valid == NULL) { ++ obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, ++ DRM_MEM_DRIVER); ++ } ++ ++ /* Flush the cache on any pages that are still invalid from the CPU's ++ * perspective. ++ */ ++ for (i = offset / PAGE_SIZE; i < (offset + size - 1) / PAGE_SIZE; i++) { ++ if (obj_priv->page_cpu_valid[i]) ++ continue; ++ ++ drm_clflush_pages(obj_priv->page_list + i, 1); ++ ++ obj_priv->page_cpu_valid[i] = 1; ++ } ++ ++ return 0; ++} ++ ++/** ++ * Once all of the objects have been set in the proper domain, ++ * perform the necessary flush and invalidate operations. ++ * ++ * Returns the write domains flushed, for use in flush tracking. ++ */ ++static uint32_t ++i915_gem_dev_set_domain(struct drm_device *dev) ++{ ++ uint32_t flush_domains = dev->flush_domains; ++ ++ /* ++ * Now that all the buffers are synced to the proper domains, ++ * flush and invalidate the collected domains ++ */ ++ if (dev->invalidate_domains | dev->flush_domains) { ++#if WATCH_EXEC ++ DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", ++ __func__, ++ dev->invalidate_domains, ++ dev->flush_domains); ++#endif ++ i915_gem_flush(dev, ++ dev->invalidate_domains, ++ dev->flush_domains); ++ dev->invalidate_domains = 0; ++ dev->flush_domains = 0; ++ } ++ ++ return flush_domains; ++} ++ ++/** ++ * Pin an object to the GTT and evaluate the relocations landing in it. ++ */ ++static int ++i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, ++ struct drm_file *file_priv, ++ struct drm_i915_gem_exec_object *entry) ++{ ++ struct drm_device *dev = obj->dev; ++ struct drm_i915_gem_relocation_entry reloc; ++ struct drm_i915_gem_relocation_entry __user *relocs; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int i, ret; ++ uint32_t last_reloc_offset = -1; ++ void *reloc_page = NULL; ++ ++ /* Choose the GTT offset for our buffer and put it there. */ ++ ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); ++ if (ret) ++ return ret; ++ ++ entry->offset = obj_priv->gtt_offset; ++ ++ relocs = (struct drm_i915_gem_relocation_entry __user *) ++ (uintptr_t) entry->relocs_ptr; ++ /* Apply the relocations, using the GTT aperture to avoid cache ++ * flushing requirements. ++ */ ++ for (i = 0; i < entry->relocation_count; i++) { ++ struct drm_gem_object *target_obj; ++ struct drm_i915_gem_object *target_obj_priv; ++ uint32_t reloc_val, reloc_offset, *reloc_entry; ++ int ret; ++ ++ ret = copy_from_user(&reloc, relocs + i, sizeof(reloc)); ++ if (ret != 0) { ++ i915_gem_object_unpin(obj); ++ return ret; ++ } ++ ++ target_obj = drm_gem_object_lookup(obj->dev, file_priv, ++ reloc.target_handle); ++ if (target_obj == NULL) { ++ i915_gem_object_unpin(obj); ++ return -EBADF; ++ } ++ target_obj_priv = target_obj->driver_private; ++ ++ /* The target buffer should have appeared before us in the ++ * exec_object list, so it should have a GTT space bound by now. ++ */ ++ if (target_obj_priv->gtt_space == NULL) { ++ DRM_ERROR("No GTT space found for object %d\n", ++ reloc.target_handle); ++ drm_gem_object_unreference(target_obj); ++ i915_gem_object_unpin(obj); ++ return -EINVAL; ++ } ++ ++ if (reloc.offset > obj->size - 4) { ++ DRM_ERROR("Relocation beyond object bounds: " ++ "obj %p target %d offset %d size %d.\n", ++ obj, reloc.target_handle, ++ (int) reloc.offset, (int) obj->size); ++ drm_gem_object_unreference(target_obj); ++ i915_gem_object_unpin(obj); ++ return -EINVAL; ++ } ++ if (reloc.offset & 3) { ++ DRM_ERROR("Relocation not 4-byte aligned: " ++ "obj %p target %d offset %d.\n", ++ obj, reloc.target_handle, ++ (int) reloc.offset); ++ drm_gem_object_unreference(target_obj); ++ i915_gem_object_unpin(obj); ++ return -EINVAL; ++ } ++ ++ if (reloc.write_domain && target_obj->pending_write_domain && ++ reloc.write_domain != target_obj->pending_write_domain) { ++ DRM_ERROR("Write domain conflict: " ++ "obj %p target %d offset %d " ++ "new %08x old %08x\n", ++ obj, reloc.target_handle, ++ (int) reloc.offset, ++ reloc.write_domain, ++ target_obj->pending_write_domain); ++ drm_gem_object_unreference(target_obj); ++ i915_gem_object_unpin(obj); ++ return -EINVAL; ++ } ++ ++#if WATCH_RELOC ++ DRM_INFO("%s: obj %p offset %08x target %d " ++ "read %08x write %08x gtt %08x " ++ "presumed %08x delta %08x\n", ++ __func__, ++ obj, ++ (int) reloc.offset, ++ (int) reloc.target_handle, ++ (int) reloc.read_domains, ++ (int) reloc.write_domain, ++ (int) target_obj_priv->gtt_offset, ++ (int) reloc.presumed_offset, ++ reloc.delta); ++#endif ++ ++ target_obj->pending_read_domains |= reloc.read_domains; ++ target_obj->pending_write_domain |= reloc.write_domain; ++ ++ /* If the relocation already has the right value in it, no ++ * more work needs to be done. ++ */ ++ if (target_obj_priv->gtt_offset == reloc.presumed_offset) { ++ drm_gem_object_unreference(target_obj); ++ continue; ++ } ++ ++ /* Now that we're going to actually write some data in, ++ * make sure that any rendering using this buffer's contents ++ * is completed. ++ */ ++ i915_gem_object_wait_rendering(obj); ++ ++ /* As we're writing through the gtt, flush ++ * any CPU writes before we write the relocations ++ */ ++ if (obj->write_domain & I915_GEM_DOMAIN_CPU) { ++ i915_gem_clflush_object(obj); ++ drm_agp_chipset_flush(dev); ++ obj->write_domain = 0; ++ } ++ ++ /* Map the page containing the relocation we're going to ++ * perform. ++ */ ++ reloc_offset = obj_priv->gtt_offset + reloc.offset; ++ if (reloc_page == NULL || ++ (last_reloc_offset & ~(PAGE_SIZE - 1)) != ++ (reloc_offset & ~(PAGE_SIZE - 1))) { ++ if (reloc_page != NULL) ++ iounmap(reloc_page); ++ ++ reloc_page = ioremap(dev->agp->base + ++ (reloc_offset & ~(PAGE_SIZE - 1)), ++ PAGE_SIZE); ++ last_reloc_offset = reloc_offset; ++ if (reloc_page == NULL) { ++ drm_gem_object_unreference(target_obj); ++ i915_gem_object_unpin(obj); ++ return -ENOMEM; ++ } ++ } ++ ++ reloc_entry = (uint32_t *)((char *)reloc_page + ++ (reloc_offset & (PAGE_SIZE - 1))); ++ reloc_val = target_obj_priv->gtt_offset + reloc.delta; ++ ++#if WATCH_BUF ++ DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", ++ obj, (unsigned int) reloc.offset, ++ readl(reloc_entry), reloc_val); ++#endif ++ writel(reloc_val, reloc_entry); ++ ++ /* Write the updated presumed offset for this entry back out ++ * to the user. ++ */ ++ reloc.presumed_offset = target_obj_priv->gtt_offset; ++ ret = copy_to_user(relocs + i, &reloc, sizeof(reloc)); ++ if (ret != 0) { ++ drm_gem_object_unreference(target_obj); ++ i915_gem_object_unpin(obj); ++ return ret; ++ } ++ ++ drm_gem_object_unreference(target_obj); ++ } ++ ++ if (reloc_page != NULL) ++ iounmap(reloc_page); ++ ++#if WATCH_BUF ++ if (0) ++ i915_gem_dump_object(obj, 128, __func__, ~0); ++#endif ++ return 0; ++} ++ ++/** Dispatch a batchbuffer to the ring ++ */ ++static int ++i915_dispatch_gem_execbuffer(struct drm_device *dev, ++ struct drm_i915_gem_execbuffer *exec, ++ uint64_t exec_offset) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) ++ (uintptr_t) exec->cliprects_ptr; ++ int nbox = exec->num_cliprects; ++ int i = 0, count; ++ uint32_t exec_start, exec_len; ++ RING_LOCALS; ++ ++ exec_start = (uint32_t) exec_offset + exec->batch_start_offset; ++ exec_len = (uint32_t) exec->batch_len; ++ ++ if ((exec_start | exec_len) & 0x7) { ++ DRM_ERROR("alignment\n"); ++ return -EINVAL; ++ } ++ ++ if (!exec_start) ++ return -EINVAL; ++ ++ count = nbox ? nbox : 1; ++ ++ for (i = 0; i < count; i++) { ++ if (i < nbox) { ++ int ret = i915_emit_box(dev, boxes, i, ++ exec->DR1, exec->DR4); ++ if (ret) ++ return ret; ++ } ++ ++ if (IS_I830(dev) || IS_845G(dev)) { ++ BEGIN_LP_RING(4); ++ OUT_RING(MI_BATCH_BUFFER); ++ OUT_RING(exec_start | MI_BATCH_NON_SECURE); ++ OUT_RING(exec_start + exec_len - 4); ++ OUT_RING(0); ++ ADVANCE_LP_RING(); ++ } else { ++ BEGIN_LP_RING(2); ++ if (IS_I965G(dev)) { ++ OUT_RING(MI_BATCH_BUFFER_START | ++ (2 << 6) | ++ MI_BATCH_NON_SECURE_I965); ++ OUT_RING(exec_start); ++ } else { ++ OUT_RING(MI_BATCH_BUFFER_START | ++ (2 << 6)); ++ OUT_RING(exec_start | MI_BATCH_NON_SECURE); ++ } ++ ADVANCE_LP_RING(); ++ } ++ } ++ ++ /* XXX breadcrumb */ ++ return 0; ++} ++ ++/* Throttle our rendering by waiting until the ring has completed our requests ++ * emitted over 20 msec ago. ++ * ++ * This should get us reasonable parallelism between CPU and GPU but also ++ * relatively low latency when blocking on a particular request to finish. ++ */ ++static int ++i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) ++{ ++ struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; ++ int ret = 0; ++ uint32_t seqno; ++ ++ mutex_lock(&dev->struct_mutex); ++ seqno = i915_file_priv->mm.last_gem_throttle_seqno; ++ i915_file_priv->mm.last_gem_throttle_seqno = ++ i915_file_priv->mm.last_gem_seqno; ++ if (seqno) ++ ret = i915_wait_request(dev, seqno); ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++} ++ ++int ++i915_gem_execbuffer(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; ++ struct drm_i915_gem_execbuffer *args = data; ++ struct drm_i915_gem_exec_object *exec_list = NULL; ++ struct drm_gem_object **object_list = NULL; ++ struct drm_gem_object *batch_obj; ++ int ret, i, pinned = 0; ++ uint64_t exec_offset; ++ uint32_t seqno, flush_domains, pre_flush_domains; ++ ++#if WATCH_EXEC ++ DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", ++ (int) args->buffers_ptr, args->buffer_count, args->batch_len); ++#endif ++ ++ /* Copy in the exec list from userland */ ++ exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, ++ DRM_MEM_DRIVER); ++ object_list = drm_calloc(sizeof(*object_list), args->buffer_count, ++ DRM_MEM_DRIVER); ++ if (exec_list == NULL || object_list == NULL) { ++ DRM_ERROR("Failed to allocate exec or object list " ++ "for %d buffers\n", ++ args->buffer_count); ++ ret = -ENOMEM; ++ goto pre_mutex_err; ++ } ++ ret = copy_from_user(exec_list, ++ (struct drm_i915_relocation_entry __user *) ++ (uintptr_t) args->buffers_ptr, ++ sizeof(*exec_list) * args->buffer_count); ++ if (ret != 0) { ++ DRM_ERROR("copy %d exec entries failed %d\n", ++ args->buffer_count, ret); ++ goto pre_mutex_err; ++ } ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++ if (dev_priv->mm.wedged) { ++ DRM_ERROR("Execbuf while wedged\n"); ++ mutex_unlock(&dev->struct_mutex); ++ return -EIO; ++ } ++ ++ if (dev_priv->mm.suspended) { ++ DRM_ERROR("Execbuf while VT-switched.\n"); ++ mutex_unlock(&dev->struct_mutex); ++ return -EBUSY; ++ } ++ ++ /* Zero the gloabl flush/invalidate flags. These ++ * will be modified as each object is bound to the ++ * gtt ++ */ ++ dev->invalidate_domains = 0; ++ dev->flush_domains = 0; ++ ++ /* Look up object handles and perform the relocations */ ++ for (i = 0; i < args->buffer_count; i++) { ++ object_list[i] = drm_gem_object_lookup(dev, file_priv, ++ exec_list[i].handle); ++ if (object_list[i] == NULL) { ++ DRM_ERROR("Invalid object handle %d at index %d\n", ++ exec_list[i].handle, i); ++ ret = -EBADF; ++ goto err; ++ } ++ ++ object_list[i]->pending_read_domains = 0; ++ object_list[i]->pending_write_domain = 0; ++ ret = i915_gem_object_pin_and_relocate(object_list[i], ++ file_priv, ++ &exec_list[i]); ++ if (ret) { ++ DRM_ERROR("object bind and relocate failed %d\n", ret); ++ goto err; ++ } ++ pinned = i + 1; ++ } ++ ++ /* Set the pending read domains for the batch buffer to COMMAND */ ++ batch_obj = object_list[args->buffer_count-1]; ++ batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; ++ batch_obj->pending_write_domain = 0; ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++ for (i = 0; i < args->buffer_count; i++) { ++ struct drm_gem_object *obj = object_list[i]; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ if (obj_priv->gtt_space == NULL) { ++ /* We evicted the buffer in the process of validating ++ * our set of buffers in. We could try to recover by ++ * kicking them everything out and trying again from ++ * the start. ++ */ ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ /* make sure all previous memory operations have passed */ ++ ret = i915_gem_object_set_domain(obj, ++ obj->pending_read_domains, ++ obj->pending_write_domain); ++ if (ret) ++ goto err; ++ } ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++ /* Flush/invalidate caches and chipset buffer */ ++ flush_domains = i915_gem_dev_set_domain(dev); ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++#if WATCH_COHERENCY ++ for (i = 0; i < args->buffer_count; i++) { ++ i915_gem_object_check_coherency(object_list[i], ++ exec_list[i].handle); ++ } ++#endif ++ ++ exec_offset = exec_list[args->buffer_count - 1].offset; ++ ++#if WATCH_EXEC ++ i915_gem_dump_object(object_list[args->buffer_count - 1], ++ args->batch_len, ++ __func__, ++ ~0); ++#endif ++ ++ pre_flush_domains = flush_domains; ++ ++ /* Exec the batchbuffer */ ++ ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset); ++ if (ret) { ++ DRM_ERROR("dispatch failed %d\n", ret); ++ goto err; ++ } ++ ++ /* ++ * Ensure that the commands in the batch buffer are ++ * finished before the interrupt fires ++ */ ++ flush_domains |= i915_retire_commands(dev); ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++ /* ++ * Get a seqno representing the execution of the current buffer, ++ * which we can wait on. We would like to mitigate these interrupts, ++ * likely by only creating seqnos occasionally (so that we have ++ * *some* interrupts representing completion of buffers that we can ++ * wait on when trying to clear up gtt space). ++ */ ++ seqno = i915_add_request(dev, flush_domains); ++ BUG_ON(seqno == 0); ++ i915_file_priv->mm.last_gem_seqno = seqno; ++ for (i = 0; i < args->buffer_count; i++) { ++ struct drm_gem_object *obj = object_list[i]; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ if (pre_flush_domains & obj->pending_write_domain & ++ ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) { ++ /* If we had a batchbuffer that resulted in a GPU ++ * domain being flushed before execution, followed by ++ * execution that resulted in the write_domain being ++ * set, then when that request is retired the ++ * write_domain would be incorrectly cleared. We're not ++ * sure that this can be triggered. ++ */ ++ DRM_ERROR("Going to lose the write domain on " ++ "obj %d size %d\n", ++ exec_list[i].handle, obj->size); ++ } ++ ++ i915_gem_object_move_to_active(obj); ++ obj_priv->last_rendering_seqno = seqno; ++#if WATCH_LRU ++ DRM_INFO("%s: move to exec list %p\n", __func__, obj); ++#endif ++ } ++#if WATCH_LRU ++ i915_dump_lru(dev, __func__); ++#endif ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++ /* Copy the new buffer offsets back to the user's exec list. */ ++ ret = copy_to_user((struct drm_i915_relocation_entry __user *) ++ (uintptr_t) args->buffers_ptr, ++ exec_list, ++ sizeof(*exec_list) * args->buffer_count); ++ if (ret) ++ DRM_ERROR("failed to copy %d exec entries " ++ "back to user (%d)\n", ++ args->buffer_count, ret); ++err: ++ if (object_list != NULL) { ++ for (i = 0; i < pinned; i++) ++ i915_gem_object_unpin(object_list[i]); ++ ++ for (i = 0; i < args->buffer_count; i++) ++ drm_gem_object_unreference(object_list[i]); ++ } ++ mutex_unlock(&dev->struct_mutex); ++ ++pre_mutex_err: ++ drm_free(object_list, sizeof(*object_list) * args->buffer_count, ++ DRM_MEM_DRIVER); ++ drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, ++ DRM_MEM_DRIVER); ++ ++ return ret; ++} ++ ++int ++i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) ++{ ++ struct drm_device *dev = obj->dev; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int ret; ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ if (obj_priv->gtt_space == NULL) { ++ ret = i915_gem_object_bind_to_gtt(obj, alignment); ++ if (ret != 0) { ++ DRM_ERROR("Failure to bind: %d", ret); ++ return ret; ++ } ++ } ++ obj_priv->pin_count++; ++ ++ /* If the object is not active and not pending a flush, ++ * remove it from the inactive list ++ */ ++ if (obj_priv->pin_count == 1) { ++ atomic_inc(&dev->pin_count); ++ atomic_add(obj->size, &dev->pin_memory); ++ if (!obj_priv->active && ++ (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | ++ I915_GEM_DOMAIN_GTT)) == 0 && ++ !list_empty(&obj_priv->list)) ++ list_del_init(&obj_priv->list); ++ } ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ ++ return 0; ++} ++ ++void ++i915_gem_object_unpin(struct drm_gem_object *obj) ++{ ++ struct drm_device *dev = obj->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++ obj_priv->pin_count--; ++ BUG_ON(obj_priv->pin_count < 0); ++ BUG_ON(obj_priv->gtt_space == NULL); ++ ++ /* If the object is no longer pinned, and is ++ * neither active nor being flushed, then stick it on ++ * the inactive list ++ */ ++ if (obj_priv->pin_count == 0) { ++ if (!obj_priv->active && ++ (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | ++ I915_GEM_DOMAIN_GTT)) == 0) ++ list_move_tail(&obj_priv->list, ++ &dev_priv->mm.inactive_list); ++ atomic_dec(&dev->pin_count); ++ atomic_sub(obj->size, &dev->pin_memory); ++ } ++ i915_verify_inactive(dev, __FILE__, __LINE__); ++} ++ ++int ++i915_gem_pin_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_pin *args = data; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) { ++ DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", ++ args->handle); ++ mutex_unlock(&dev->struct_mutex); ++ return -EBADF; ++ } ++ obj_priv = obj->driver_private; ++ ++ ret = i915_gem_object_pin(obj, args->alignment); ++ if (ret != 0) { ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++ } ++ ++ /* XXX - flush the CPU caches for pinned objects ++ * as the X server doesn't manage domains yet ++ */ ++ if (obj->write_domain & I915_GEM_DOMAIN_CPU) { ++ i915_gem_clflush_object(obj); ++ drm_agp_chipset_flush(dev); ++ obj->write_domain = 0; ++ } ++ args->offset = obj_priv->gtt_offset; ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ ++ return 0; ++} ++ ++int ++i915_gem_unpin_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_pin *args = data; ++ struct drm_gem_object *obj; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) { ++ DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", ++ args->handle); ++ mutex_unlock(&dev->struct_mutex); ++ return -EBADF; ++ } ++ ++ i915_gem_object_unpin(obj); ++ ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ return 0; ++} ++ ++int ++i915_gem_busy_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_busy *args = data; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ ++ mutex_lock(&dev->struct_mutex); ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) { ++ DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", ++ args->handle); ++ mutex_unlock(&dev->struct_mutex); ++ return -EBADF; ++ } ++ ++ obj_priv = obj->driver_private; ++ args->busy = obj_priv->active; ++ ++ drm_gem_object_unreference(obj); ++ mutex_unlock(&dev->struct_mutex); ++ return 0; ++} ++ ++int ++i915_gem_throttle_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ return i915_gem_ring_throttle(dev, file_priv); ++} ++ ++int i915_gem_init_object(struct drm_gem_object *obj) ++{ ++ struct drm_i915_gem_object *obj_priv; ++ ++ obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); ++ if (obj_priv == NULL) ++ return -ENOMEM; ++ ++ /* ++ * We've just allocated pages from the kernel, ++ * so they've just been written by the CPU with ++ * zeros. They'll need to be clflushed before we ++ * use them with the GPU. ++ */ ++ obj->write_domain = I915_GEM_DOMAIN_CPU; ++ obj->read_domains = I915_GEM_DOMAIN_CPU; ++ ++ obj->driver_private = obj_priv; ++ obj_priv->obj = obj; ++ INIT_LIST_HEAD(&obj_priv->list); ++ return 0; ++} ++ ++void i915_gem_free_object(struct drm_gem_object *obj) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ ++ while (obj_priv->pin_count > 0) ++ i915_gem_object_unpin(obj); ++ ++ i915_gem_object_unbind(obj); ++ ++ drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); ++ drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); ++} ++ ++static int ++i915_gem_set_domain(struct drm_gem_object *obj, ++ struct drm_file *file_priv, ++ uint32_t read_domains, ++ uint32_t write_domain) ++{ ++ struct drm_device *dev = obj->dev; ++ int ret; ++ uint32_t flush_domains; ++ ++ BUG_ON(!mutex_is_locked(&dev->struct_mutex)); ++ ++ ret = i915_gem_object_set_domain(obj, read_domains, write_domain); ++ if (ret) ++ return ret; ++ flush_domains = i915_gem_dev_set_domain(obj->dev); ++ ++ if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) ++ (void) i915_add_request(dev, flush_domains); ++ ++ return 0; ++} ++ ++/** Unbinds all objects that are on the given buffer list. */ ++static int ++i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) ++{ ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret; ++ ++ while (!list_empty(head)) { ++ obj_priv = list_first_entry(head, ++ struct drm_i915_gem_object, ++ list); ++ obj = obj_priv->obj; ++ ++ if (obj_priv->pin_count != 0) { ++ DRM_ERROR("Pinned object in unbind list\n"); ++ mutex_unlock(&dev->struct_mutex); ++ return -EINVAL; ++ } ++ ++ ret = i915_gem_object_unbind(obj); ++ if (ret != 0) { ++ DRM_ERROR("Error unbinding object in LeaveVT: %d\n", ++ ret); ++ mutex_unlock(&dev->struct_mutex); ++ return ret; ++ } ++ } ++ ++ ++ return 0; ++} ++ ++static int ++i915_gem_idle(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ uint32_t seqno, cur_seqno, last_seqno; ++ int stuck; ++ ++ if (dev_priv->mm.suspended) ++ return 0; ++ ++ /* Hack! Don't let anybody do execbuf while we don't control the chip. ++ * We need to replace this with a semaphore, or something. ++ */ ++ dev_priv->mm.suspended = 1; ++ ++ i915_kernel_lost_context(dev); ++ ++ /* Flush the GPU along with all non-CPU write domains ++ */ ++ i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), ++ ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); ++ seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU | ++ I915_GEM_DOMAIN_GTT)); ++ ++ if (seqno == 0) { ++ mutex_unlock(&dev->struct_mutex); ++ return -ENOMEM; ++ } ++ ++ dev_priv->mm.waiting_gem_seqno = seqno; ++ last_seqno = 0; ++ stuck = 0; ++ for (;;) { ++ cur_seqno = i915_get_gem_seqno(dev); ++ if (i915_seqno_passed(cur_seqno, seqno)) ++ break; ++ if (last_seqno == cur_seqno) { ++ if (stuck++ > 100) { ++ DRM_ERROR("hardware wedged\n"); ++ dev_priv->mm.wedged = 1; ++ DRM_WAKEUP(&dev_priv->irq_queue); ++ break; ++ } ++ } ++ msleep(10); ++ last_seqno = cur_seqno; ++ } ++ dev_priv->mm.waiting_gem_seqno = 0; ++ ++ i915_gem_retire_requests(dev); ++ ++ /* Active and flushing should now be empty as we've ++ * waited for a sequence higher than any pending execbuffer ++ */ ++ BUG_ON(!list_empty(&dev_priv->mm.active_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); ++ ++ /* Request should now be empty as we've also waited ++ * for the last request in the list ++ */ ++ BUG_ON(!list_empty(&dev_priv->mm.request_list)); ++ ++ /* Move all buffers out of the GTT. */ ++ i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); ++ ++ BUG_ON(!list_empty(&dev_priv->mm.active_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.request_list)); ++ return 0; ++} ++ ++static int ++i915_gem_init_hws(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret; ++ ++ /* If we need a physical address for the status page, it's already ++ * initialized at driver load time. ++ */ ++ if (!I915_NEED_GFX_HWS(dev)) ++ return 0; ++ ++ obj = drm_gem_object_alloc(dev, 4096); ++ if (obj == NULL) { ++ DRM_ERROR("Failed to allocate status page\n"); ++ return -ENOMEM; ++ } ++ obj_priv = obj->driver_private; ++ ++ ret = i915_gem_object_pin(obj, 4096); ++ if (ret != 0) { ++ drm_gem_object_unreference(obj); ++ return ret; ++ } ++ ++ dev_priv->status_gfx_addr = obj_priv->gtt_offset; ++ dev_priv->hws_map.offset = dev->agp->base + obj_priv->gtt_offset; ++ dev_priv->hws_map.size = 4096; ++ dev_priv->hws_map.type = 0; ++ dev_priv->hws_map.flags = 0; ++ dev_priv->hws_map.mtrr = 0; ++ ++ drm_core_ioremap(&dev_priv->hws_map, dev); ++ if (dev_priv->hws_map.handle == NULL) { ++ DRM_ERROR("Failed to map status page.\n"); ++ memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); ++ drm_gem_object_unreference(obj); ++ return -EINVAL; ++ } ++ dev_priv->hws_obj = obj; ++ dev_priv->hw_status_page = dev_priv->hws_map.handle; ++ memset(dev_priv->hw_status_page, 0, PAGE_SIZE); ++ I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); ++ DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); ++ ++ return 0; ++} ++ ++static int ++i915_gem_init_ringbuffer(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ int ret; ++ ++ ret = i915_gem_init_hws(dev); ++ if (ret != 0) ++ return ret; ++ ++ obj = drm_gem_object_alloc(dev, 128 * 1024); ++ if (obj == NULL) { ++ DRM_ERROR("Failed to allocate ringbuffer\n"); ++ return -ENOMEM; ++ } ++ obj_priv = obj->driver_private; ++ ++ ret = i915_gem_object_pin(obj, 4096); ++ if (ret != 0) { ++ drm_gem_object_unreference(obj); ++ return ret; ++ } ++ ++ /* Set up the kernel mapping for the ring. */ ++ dev_priv->ring.Size = obj->size; ++ dev_priv->ring.tail_mask = obj->size - 1; ++ ++ dev_priv->ring.map.offset = dev->agp->base + obj_priv->gtt_offset; ++ dev_priv->ring.map.size = obj->size; ++ dev_priv->ring.map.type = 0; ++ dev_priv->ring.map.flags = 0; ++ dev_priv->ring.map.mtrr = 0; ++ ++ drm_core_ioremap(&dev_priv->ring.map, dev); ++ if (dev_priv->ring.map.handle == NULL) { ++ DRM_ERROR("Failed to map ringbuffer.\n"); ++ memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); ++ drm_gem_object_unreference(obj); ++ return -EINVAL; ++ } ++ dev_priv->ring.ring_obj = obj; ++ dev_priv->ring.virtual_start = dev_priv->ring.map.handle; ++ ++ /* Stop the ring if it's running. */ ++ I915_WRITE(PRB0_CTL, 0); ++ I915_WRITE(PRB0_HEAD, 0); ++ I915_WRITE(PRB0_TAIL, 0); ++ I915_WRITE(PRB0_START, 0); ++ ++ /* Initialize the ring. */ ++ I915_WRITE(PRB0_START, obj_priv->gtt_offset); ++ I915_WRITE(PRB0_CTL, ++ ((obj->size - 4096) & RING_NR_PAGES) | ++ RING_NO_REPORT | ++ RING_VALID); ++ ++ /* Update our cache of the ring state */ ++ i915_kernel_lost_context(dev); ++ ++ return 0; ++} ++ ++static void ++i915_gem_cleanup_ringbuffer(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ ++ if (dev_priv->ring.ring_obj == NULL) ++ return; ++ ++ drm_core_ioremapfree(&dev_priv->ring.map, dev); ++ ++ i915_gem_object_unpin(dev_priv->ring.ring_obj); ++ drm_gem_object_unreference(dev_priv->ring.ring_obj); ++ dev_priv->ring.ring_obj = NULL; ++ memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); ++ ++ if (dev_priv->hws_obj != NULL) { ++ i915_gem_object_unpin(dev_priv->hws_obj); ++ drm_gem_object_unreference(dev_priv->hws_obj); ++ dev_priv->hws_obj = NULL; ++ memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); ++ ++ /* Write high address into HWS_PGA when disabling. */ ++ I915_WRITE(HWS_PGA, 0x1ffff000); ++ } ++} ++ ++int ++i915_gem_entervt_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ int ret; ++ ++ if (dev_priv->mm.wedged) { ++ DRM_ERROR("Reenabling wedged hardware, good luck\n"); ++ dev_priv->mm.wedged = 0; ++ } ++ ++ ret = i915_gem_init_ringbuffer(dev); ++ if (ret != 0) ++ return ret; ++ ++ mutex_lock(&dev->struct_mutex); ++ BUG_ON(!list_empty(&dev_priv->mm.active_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); ++ BUG_ON(!list_empty(&dev_priv->mm.request_list)); ++ dev_priv->mm.suspended = 0; ++ mutex_unlock(&dev->struct_mutex); ++ return 0; ++} ++ ++int ++i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ int ret; ++ ++ mutex_lock(&dev->struct_mutex); ++ ret = i915_gem_idle(dev); ++ if (ret == 0) ++ i915_gem_cleanup_ringbuffer(dev); ++ mutex_unlock(&dev->struct_mutex); ++ ++ return 0; ++} ++ ++void ++i915_gem_lastclose(struct drm_device *dev) ++{ ++ int ret; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ if (dev_priv->ring.ring_obj != NULL) { ++ ret = i915_gem_idle(dev); ++ if (ret) ++ DRM_ERROR("failed to idle hardware: %d\n", ret); ++ ++ i915_gem_cleanup_ringbuffer(dev); ++ } ++ ++ mutex_unlock(&dev->struct_mutex); ++} ++ ++void ++i915_gem_load(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ ++ INIT_LIST_HEAD(&dev_priv->mm.active_list); ++ INIT_LIST_HEAD(&dev_priv->mm.flushing_list); ++ INIT_LIST_HEAD(&dev_priv->mm.inactive_list); ++ INIT_LIST_HEAD(&dev_priv->mm.request_list); ++ INIT_DELAYED_WORK(&dev_priv->mm.retire_work, ++ i915_gem_retire_work_handler); ++ dev_priv->mm.next_gem_seqno = 1; ++ ++ i915_gem_detect_bit_6_swizzle(dev); ++} +--- /dev/null ++++ b/drivers/gpu/drm/i915/i915_gem_debug.c +@@ -0,0 +1,201 @@ ++/* ++ * Copyright © 2008 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Keith Packard <keithp@keithp.com> ++ * ++ */ ++ ++#include "drmP.h" ++#include "drm.h" ++#include "i915_drm.h" ++#include "i915_drv.h" ++ ++#if WATCH_INACTIVE ++void ++i915_verify_inactive(struct drm_device *dev, char *file, int line) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ ++ list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { ++ obj = obj_priv->obj; ++ if (obj_priv->pin_count || obj_priv->active || ++ (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | ++ I915_GEM_DOMAIN_GTT))) ++ DRM_ERROR("inactive %p (p %d a %d w %x) %s:%d\n", ++ obj, ++ obj_priv->pin_count, obj_priv->active, ++ obj->write_domain, file, line); ++ } ++} ++#endif /* WATCH_INACTIVE */ ++ ++ ++#if WATCH_BUF | WATCH_EXEC | WATCH_PWRITE ++static void ++i915_gem_dump_page(struct page *page, uint32_t start, uint32_t end, ++ uint32_t bias, uint32_t mark) ++{ ++ uint32_t *mem = kmap_atomic(page, KM_USER0); ++ int i; ++ for (i = start; i < end; i += 4) ++ DRM_INFO("%08x: %08x%s\n", ++ (int) (bias + i), mem[i / 4], ++ (bias + i == mark) ? " ********" : ""); ++ kunmap_atomic(mem, KM_USER0); ++ /* give syslog time to catch up */ ++ msleep(1); ++} ++ ++void ++i915_gem_dump_object(struct drm_gem_object *obj, int len, ++ const char *where, uint32_t mark) ++{ ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int page; ++ ++ DRM_INFO("%s: object at offset %08x\n", where, obj_priv->gtt_offset); ++ for (page = 0; page < (len + PAGE_SIZE-1) / PAGE_SIZE; page++) { ++ int page_len, chunk, chunk_len; ++ ++ page_len = len - page * PAGE_SIZE; ++ if (page_len > PAGE_SIZE) ++ page_len = PAGE_SIZE; ++ ++ for (chunk = 0; chunk < page_len; chunk += 128) { ++ chunk_len = page_len - chunk; ++ if (chunk_len > 128) ++ chunk_len = 128; ++ i915_gem_dump_page(obj_priv->page_list[page], ++ chunk, chunk + chunk_len, ++ obj_priv->gtt_offset + ++ page * PAGE_SIZE, ++ mark); ++ } ++ } ++} ++#endif ++ ++#if WATCH_LRU ++void ++i915_dump_lru(struct drm_device *dev, const char *where) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv; ++ ++ DRM_INFO("active list %s {\n", where); ++ list_for_each_entry(obj_priv, &dev_priv->mm.active_list, ++ list) ++ { ++ DRM_INFO(" %p: %08x\n", obj_priv, ++ obj_priv->last_rendering_seqno); ++ } ++ DRM_INFO("}\n"); ++ DRM_INFO("flushing list %s {\n", where); ++ list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, ++ list) ++ { ++ DRM_INFO(" %p: %08x\n", obj_priv, ++ obj_priv->last_rendering_seqno); ++ } ++ DRM_INFO("}\n"); ++ DRM_INFO("inactive %s {\n", where); ++ list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { ++ DRM_INFO(" %p: %08x\n", obj_priv, ++ obj_priv->last_rendering_seqno); ++ } ++ DRM_INFO("}\n"); ++} ++#endif ++ ++ ++#if WATCH_COHERENCY ++void ++i915_gem_object_check_coherency(struct drm_gem_object *obj, int handle) ++{ ++ struct drm_device *dev = obj->dev; ++ struct drm_i915_gem_object *obj_priv = obj->driver_private; ++ int page; ++ uint32_t *gtt_mapping; ++ uint32_t *backing_map = NULL; ++ int bad_count = 0; ++ ++ DRM_INFO("%s: checking coherency of object %p@0x%08x (%d, %dkb):\n", ++ __func__, obj, obj_priv->gtt_offset, handle, ++ obj->size / 1024); ++ ++ gtt_mapping = ioremap(dev->agp->base + obj_priv->gtt_offset, ++ obj->size); ++ if (gtt_mapping == NULL) { ++ DRM_ERROR("failed to map GTT space\n"); ++ return; ++ } ++ ++ for (page = 0; page < obj->size / PAGE_SIZE; page++) { ++ int i; ++ ++ backing_map = kmap_atomic(obj_priv->page_list[page], KM_USER0); ++ ++ if (backing_map == NULL) { ++ DRM_ERROR("failed to map backing page\n"); ++ goto out; ++ } ++ ++ for (i = 0; i < PAGE_SIZE / 4; i++) { ++ uint32_t cpuval = backing_map[i]; ++ uint32_t gttval = readl(gtt_mapping + ++ page * 1024 + i); ++ ++ if (cpuval != gttval) { ++ DRM_INFO("incoherent CPU vs GPU at 0x%08x: " ++ "0x%08x vs 0x%08x\n", ++ (int)(obj_priv->gtt_offset + ++ page * PAGE_SIZE + i * 4), ++ cpuval, gttval); ++ if (bad_count++ >= 8) { ++ DRM_INFO("...\n"); ++ goto out; ++ } ++ } ++ } ++ kunmap_atomic(backing_map, KM_USER0); ++ backing_map = NULL; ++ } ++ ++ out: ++ if (backing_map != NULL) ++ kunmap_atomic(backing_map, KM_USER0); ++ iounmap(gtt_mapping); ++ ++ /* give syslog time to catch up */ ++ msleep(1); ++ ++ /* Directly flush the object, since we just loaded values with the CPU ++ * from the backing pages and we don't want to disturb the cache ++ * management that we're trying to observe. ++ */ ++ ++ i915_gem_clflush_object(obj); ++} ++#endif +--- /dev/null ++++ b/drivers/gpu/drm/i915/i915_gem_proc.c +@@ -0,0 +1,292 @@ ++/* ++ * Copyright © 2008 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Eric Anholt <eric@anholt.net> ++ * Keith Packard <keithp@keithp.com> ++ * ++ */ ++ ++#include "drmP.h" ++#include "drm.h" ++#include "i915_drm.h" ++#include "i915_drv.h" ++ ++static int i915_gem_active_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("Active:\n"); ++ list_for_each_entry(obj_priv, &dev_priv->mm.active_list, ++ list) ++ { ++ struct drm_gem_object *obj = obj_priv->obj; ++ if (obj->name) { ++ DRM_PROC_PRINT(" %p(%d): %08x %08x %d\n", ++ obj, obj->name, ++ obj->read_domains, obj->write_domain, ++ obj_priv->last_rendering_seqno); ++ } else { ++ DRM_PROC_PRINT(" %p: %08x %08x %d\n", ++ obj, ++ obj->read_domains, obj->write_domain, ++ obj_priv->last_rendering_seqno); ++ } ++ } ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ ++static int i915_gem_flushing_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("Flushing:\n"); ++ list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, ++ list) ++ { ++ struct drm_gem_object *obj = obj_priv->obj; ++ if (obj->name) { ++ DRM_PROC_PRINT(" %p(%d): %08x %08x %d\n", ++ obj, obj->name, ++ obj->read_domains, obj->write_domain, ++ obj_priv->last_rendering_seqno); ++ } else { ++ DRM_PROC_PRINT(" %p: %08x %08x %d\n", obj, ++ obj->read_domains, obj->write_domain, ++ obj_priv->last_rendering_seqno); ++ } ++ } ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ ++static int i915_gem_inactive_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_object *obj_priv; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("Inactive:\n"); ++ list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, ++ list) ++ { ++ struct drm_gem_object *obj = obj_priv->obj; ++ if (obj->name) { ++ DRM_PROC_PRINT(" %p(%d): %08x %08x %d\n", ++ obj, obj->name, ++ obj->read_domains, obj->write_domain, ++ obj_priv->last_rendering_seqno); ++ } else { ++ DRM_PROC_PRINT(" %p: %08x %08x %d\n", obj, ++ obj->read_domains, obj->write_domain, ++ obj_priv->last_rendering_seqno); ++ } ++ } ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ ++static int i915_gem_request_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_i915_gem_request *gem_request; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("Request:\n"); ++ list_for_each_entry(gem_request, &dev_priv->mm.request_list, ++ list) ++ { ++ DRM_PROC_PRINT(" %d @ %d %08x\n", ++ gem_request->seqno, ++ (int) (jiffies - gem_request->emitted_jiffies), ++ gem_request->flush_domains); ++ } ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ ++static int i915_gem_seqno_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("Current sequence: %d\n", i915_get_gem_seqno(dev)); ++ DRM_PROC_PRINT("Waiter sequence: %d\n", ++ dev_priv->mm.waiting_gem_seqno); ++ DRM_PROC_PRINT("IRQ sequence: %d\n", dev_priv->mm.irq_gem_seqno); ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ ++ ++static int i915_interrupt_info(char *buf, char **start, off_t offset, ++ int request, int *eof, void *data) ++{ ++ struct drm_minor *minor = (struct drm_minor *) data; ++ struct drm_device *dev = minor->dev; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ int len = 0; ++ ++ if (offset > DRM_PROC_LIMIT) { ++ *eof = 1; ++ return 0; ++ } ++ ++ *start = &buf[offset]; ++ *eof = 0; ++ DRM_PROC_PRINT("Interrupt enable: %08x\n", ++ I915_READ(IER)); ++ DRM_PROC_PRINT("Interrupt identity: %08x\n", ++ I915_READ(IIR)); ++ DRM_PROC_PRINT("Interrupt mask: %08x\n", ++ I915_READ(IMR)); ++ DRM_PROC_PRINT("Pipe A stat: %08x\n", ++ I915_READ(PIPEASTAT)); ++ DRM_PROC_PRINT("Pipe B stat: %08x\n", ++ I915_READ(PIPEBSTAT)); ++ DRM_PROC_PRINT("Interrupts received: %d\n", ++ atomic_read(&dev_priv->irq_received)); ++ DRM_PROC_PRINT("Current sequence: %d\n", ++ i915_get_gem_seqno(dev)); ++ DRM_PROC_PRINT("Waiter sequence: %d\n", ++ dev_priv->mm.waiting_gem_seqno); ++ DRM_PROC_PRINT("IRQ sequence: %d\n", ++ dev_priv->mm.irq_gem_seqno); ++ if (len > request + offset) ++ return request; ++ *eof = 1; ++ return len - offset; ++} ++ ++static struct drm_proc_list { ++ /** file name */ ++ const char *name; ++ /** proc callback*/ ++ int (*f) (char *, char **, off_t, int, int *, void *); ++} i915_gem_proc_list[] = { ++ {"i915_gem_active", i915_gem_active_info}, ++ {"i915_gem_flushing", i915_gem_flushing_info}, ++ {"i915_gem_inactive", i915_gem_inactive_info}, ++ {"i915_gem_request", i915_gem_request_info}, ++ {"i915_gem_seqno", i915_gem_seqno_info}, ++ {"i915_gem_interrupt", i915_interrupt_info}, ++}; ++ ++#define I915_GEM_PROC_ENTRIES ARRAY_SIZE(i915_gem_proc_list) ++ ++int i915_gem_proc_init(struct drm_minor *minor) ++{ ++ struct proc_dir_entry *ent; ++ int i, j; ++ ++ for (i = 0; i < I915_GEM_PROC_ENTRIES; i++) { ++ ent = create_proc_entry(i915_gem_proc_list[i].name, ++ S_IFREG | S_IRUGO, minor->dev_root); ++ if (!ent) { ++ DRM_ERROR("Cannot create /proc/dri/.../%s\n", ++ i915_gem_proc_list[i].name); ++ for (j = 0; j < i; j++) ++ remove_proc_entry(i915_gem_proc_list[i].name, ++ minor->dev_root); ++ return -1; ++ } ++ ent->read_proc = i915_gem_proc_list[i].f; ++ ent->data = minor; ++ } ++ return 0; ++} ++ ++void i915_gem_proc_cleanup(struct drm_minor *minor) ++{ ++ int i; ++ ++ if (!minor->dev_root) ++ return; ++ ++ for (i = 0; i < I915_GEM_PROC_ENTRIES; i++) ++ remove_proc_entry(i915_gem_proc_list[i].name, minor->dev_root); ++} +--- /dev/null ++++ b/drivers/gpu/drm/i915/i915_gem_tiling.c +@@ -0,0 +1,271 @@ ++/* ++ * Copyright © 2008 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ * Authors: ++ * Eric Anholt <eric@anholt.net> ++ * ++ */ ++ ++#include "drmP.h" ++#include "drm.h" ++#include "i915_drm.h" ++#include "i915_drv.h" ++ ++/** @file i915_gem_tiling.c ++ * ++ * Support for managing tiling state of buffer objects. ++ * ++ * The idea behind tiling is to increase cache hit rates by rearranging ++ * pixel data so that a group of pixel accesses are in the same cacheline. ++ * Performance improvement from doing this on the back/depth buffer are on ++ * the order of 30%. ++ * ++ * Intel architectures make this somewhat more complicated, though, by ++ * adjustments made to addressing of data when the memory is in interleaved ++ * mode (matched pairs of DIMMS) to improve memory bandwidth. ++ * For interleaved memory, the CPU sends every sequential 64 bytes ++ * to an alternate memory channel so it can get the bandwidth from both. ++ * ++ * The GPU also rearranges its accesses for increased bandwidth to interleaved ++ * memory, and it matches what the CPU does for non-tiled. However, when tiled ++ * it does it a little differently, since one walks addresses not just in the ++ * X direction but also Y. So, along with alternating channels when bit ++ * 6 of the address flips, it also alternates when other bits flip -- Bits 9 ++ * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) ++ * are common to both the 915 and 965-class hardware. ++ * ++ * The CPU also sometimes XORs in higher bits as well, to improve ++ * bandwidth doing strided access like we do so frequently in graphics. This ++ * is called "Channel XOR Randomization" in the MCH documentation. The result ++ * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address ++ * decode. ++ * ++ * All of this bit 6 XORing has an effect on our memory management, ++ * as we need to make sure that the 3d driver can correctly address object ++ * contents. ++ * ++ * If we don't have interleaved memory, all tiling is safe and no swizzling is ++ * required. ++ * ++ * When bit 17 is XORed in, we simply refuse to tile at all. Bit ++ * 17 is not just a page offset, so as we page an objet out and back in, ++ * individual pages in it will have different bit 17 addresses, resulting in ++ * each 64 bytes being swapped with its neighbor! ++ * ++ * Otherwise, if interleaved, we have to tell the 3d driver what the address ++ * swizzling it needs to do is, since it's writing with the CPU to the pages ++ * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the ++ * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling ++ * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order ++ * to match what the GPU expects. ++ */ ++ ++/** ++ * Detects bit 6 swizzling of address lookup between IGD access and CPU ++ * access through main memory. ++ */ ++void ++i915_gem_detect_bit_6_swizzle(struct drm_device *dev) ++{ ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct pci_dev *bridge; ++ uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; ++ uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; ++ int ret; ++ ++ if (IS_I965G(dev) && !IS_I965GM(dev)) { ++ uint32_t chdecmisc; ++ ++ /* On the 965, channel interleave appears to be determined by ++ * the flex bit. If flex is set, then the ranks (sides of a ++ * DIMM) of memory will be "stacked" (physical addresses walk ++ * through one rank then move on to the next, flipping channels ++ * or not depending on rank configuration). The GPU in this ++ * case does exactly the same addressing as the CPU. ++ * ++ * Unlike the 945, channel randomization based does not ++ * appear to be available. ++ * ++ * XXX: While the G965 doesn't appear to do any interleaving ++ * when the DIMMs are not exactly matched, the G4x chipsets ++ * might be for "L-shaped" configurations, and will need to be ++ * detected. ++ * ++ * L-shaped configuration: ++ * ++ * +-----+ ++ * | | ++ * |DIMM2| <-- non-interleaved ++ * +-----+ ++ * +-----+ +-----+ ++ * | | | | ++ * |DIMM0| |DIMM1| <-- interleaved area ++ * +-----+ +-----+ ++ */ ++ chdecmisc = I915_READ(CHDECMISC); ++ ++ if (chdecmisc == 0xff) { ++ DRM_ERROR("Couldn't read from MCHBAR. " ++ "Disabling tiling.\n"); ++ } else if (chdecmisc & CHDECMISC_FLEXMEMORY) { ++ swizzle_x = I915_BIT_6_SWIZZLE_NONE; ++ swizzle_y = I915_BIT_6_SWIZZLE_NONE; ++ } else { ++ swizzle_x = I915_BIT_6_SWIZZLE_9_10; ++ swizzle_y = I915_BIT_6_SWIZZLE_9; ++ } ++ } else if (IS_I9XX(dev)) { ++ uint32_t dcc; ++ ++ /* On 915-945 and GM965, channel interleave by the CPU is ++ * determined by DCC. The CPU will alternate based on bit 6 ++ * in interleaved mode, and the GPU will then also alternate ++ * on bit 6, 9, and 10 for X, but the CPU may also optionally ++ * alternate based on bit 17 (XOR not disabled and XOR ++ * bit == 17). ++ */ ++ dcc = I915_READ(DCC); ++ switch (dcc & DCC_ADDRESSING_MODE_MASK) { ++ case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: ++ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: ++ swizzle_x = I915_BIT_6_SWIZZLE_NONE; ++ swizzle_y = I915_BIT_6_SWIZZLE_NONE; ++ break; ++ case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: ++ if (IS_I915G(dev) || IS_I915GM(dev) || ++ dcc & DCC_CHANNEL_XOR_DISABLE) { ++ swizzle_x = I915_BIT_6_SWIZZLE_9_10; ++ swizzle_y = I915_BIT_6_SWIZZLE_9; ++ } else if (IS_I965GM(dev)) { ++ /* GM965 only does bit 11-based channel ++ * randomization ++ */ ++ swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; ++ swizzle_y = I915_BIT_6_SWIZZLE_9_11; ++ } else { ++ /* Bit 17 or perhaps other swizzling */ ++ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; ++ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; ++ } ++ break; ++ } ++ if (dcc == 0xffffffff) { ++ DRM_ERROR("Couldn't read from MCHBAR. " ++ "Disabling tiling.\n"); ++ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; ++ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; ++ } ++ } else { ++ /* As far as we know, the 865 doesn't have these bit 6 ++ * swizzling issues. ++ */ ++ swizzle_x = I915_BIT_6_SWIZZLE_NONE; ++ swizzle_y = I915_BIT_6_SWIZZLE_NONE; ++ } ++ ++ dev_priv->mm.bit_6_swizzle_x = swizzle_x; ++ dev_priv->mm.bit_6_swizzle_y = swizzle_y; ++} ++ ++/** ++ * Sets the tiling mode of an object, returning the required swizzling of ++ * bit 6 of addresses in the object. ++ */ ++int ++i915_gem_set_tiling(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_set_tiling *args = data; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EINVAL; ++ obj_priv = obj->driver_private; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ if (args->tiling_mode == I915_TILING_NONE) { ++ obj_priv->tiling_mode = I915_TILING_NONE; ++ args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; ++ } else { ++ if (args->tiling_mode == I915_TILING_X) ++ args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; ++ else ++ args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; ++ /* If we can't handle the swizzling, make it untiled. */ ++ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { ++ args->tiling_mode = I915_TILING_NONE; ++ args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; ++ } ++ } ++ obj_priv->tiling_mode = args->tiling_mode; ++ ++ mutex_unlock(&dev->struct_mutex); ++ ++ drm_gem_object_unreference(obj); ++ ++ return 0; ++} ++ ++/** ++ * Returns the current tiling mode and required bit 6 swizzling for the object. ++ */ ++int ++i915_gem_get_tiling(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_i915_gem_get_tiling *args = data; ++ drm_i915_private_t *dev_priv = dev->dev_private; ++ struct drm_gem_object *obj; ++ struct drm_i915_gem_object *obj_priv; ++ ++ obj = drm_gem_object_lookup(dev, file_priv, args->handle); ++ if (obj == NULL) ++ return -EINVAL; ++ obj_priv = obj->driver_private; ++ ++ mutex_lock(&dev->struct_mutex); ++ ++ args->tiling_mode = obj_priv->tiling_mode; ++ switch (obj_priv->tiling_mode) { ++ case I915_TILING_X: ++ args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; ++ break; ++ case I915_TILING_Y: ++ args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; ++ break; ++ case I915_TILING_NONE: ++ args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; ++ break; ++ default: ++ DRM_ERROR("unknown tiling mode\n"); ++ } ++ ++ mutex_unlock(&dev->struct_mutex); ++ ++ drm_gem_object_unreference(obj); ++ ++ return 0; ++} +--- a/drivers/gpu/drm/i915/i915_irq.c ++++ b/drivers/gpu/drm/i915/i915_irq.c +@@ -281,8 +281,10 @@ irqreturn_t i915_driver_irq_handler(DRM_ + + dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); + +- if (iir & I915_USER_INTERRUPT) ++ if (iir & I915_USER_INTERRUPT) { ++ dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + DRM_WAKEUP(&dev_priv->irq_queue); ++ } + + if (iir & (I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | + I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT)) { +@@ -343,7 +345,7 @@ static int i915_emit_irq(struct drm_devi + return dev_priv->counter; + } + +-static void i915_user_irq_get(struct drm_device *dev) ++void i915_user_irq_get(struct drm_device *dev) + { + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + +@@ -353,7 +355,7 @@ static void i915_user_irq_get(struct drm + spin_unlock(&dev_priv->user_irq_lock); + } + +-static void i915_user_irq_put(struct drm_device *dev) ++void i915_user_irq_put(struct drm_device *dev) + { + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -25,19 +25,6 @@ + #ifndef _I915_REG_H_ + #define _I915_REG_H_ + +-/* MCH MMIO space */ +-/** 915-945 and GM965 MCH register controlling DRAM channel access */ +-#define DCC 0x200 +-#define DCC_ADDRESSING_MODE_SINGLE_CHANNEL (0 << 0) +-#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC (1 << 0) +-#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED (2 << 0) +-#define DCC_ADDRESSING_MODE_MASK (3 << 0) +-#define DCC_CHANNEL_XOR_DISABLE (1 << 10) +- +-/** 965 MCH register controlling DRAM channel configuration */ +-#define CHDECMISC 0x111 +-#define CHDECMISC_FLEXMEMORY (1 << 1) +- + /* + * The Bridge device's PCI config space has information about the + * fb aperture size and the amount of pre-reserved memory. +@@ -516,6 +503,30 @@ + #define PALETTE_A 0x0a000 + #define PALETTE_B 0x0a800 + ++/* MCH MMIO space */ ++ ++/* ++ * MCHBAR mirror. ++ * ++ * This mirrors the MCHBAR MMIO space whose location is determined by ++ * device 0 function 0's pci config register 0x44 or 0x48 and matches it in ++ * every way. It is not accessible from the CP register read instructions. ++ * ++ */ ++/** 915-945 and GM965 MCH register controlling DRAM channel access */ ++#define MCHBAR_MIRROR_BASE 0x10000 ++ ++#define DCC 0x10200 ++#define DCC_ADDRESSING_MODE_SINGLE_CHANNEL (0 << 0) ++#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC (1 << 0) ++#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED (2 << 0) ++#define DCC_ADDRESSING_MODE_MASK (3 << 0) ++#define DCC_CHANNEL_XOR_DISABLE (1 << 10) ++ ++/** 965 MCH register controlling DRAM channel configuration */ ++#define CHDECMISC 0x10111 ++#define CHDECMISC_FLEXMEMORY (1 << 1) ++ + /* + * Overlay regs + */ +--- a/include/drm/drm.h ++++ b/include/drm/drm.h +@@ -573,6 +573,34 @@ struct drm_set_version { + int drm_dd_minor; + }; + ++/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ ++struct drm_gem_close { ++ /** Handle of the object to be closed. */ ++ uint32_t handle; ++ uint32_t pad; ++}; ++ ++/** DRM_IOCTL_GEM_FLINK ioctl argument type */ ++struct drm_gem_flink { ++ /** Handle for the object being named */ ++ uint32_t handle; ++ ++ /** Returned global name */ ++ uint32_t name; ++}; ++ ++/** DRM_IOCTL_GEM_OPEN ioctl argument type */ ++struct drm_gem_open { ++ /** Name of object being opened */ ++ uint32_t name; ++ ++ /** Returned handle for the object */ ++ uint32_t handle; ++ ++ /** Returned size of the object */ ++ uint64_t size; ++}; ++ + #define DRM_IOCTL_BASE 'd' + #define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) + #define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +@@ -587,6 +615,9 @@ struct drm_set_version { + #define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) + #define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) + #define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) ++#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) ++#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) ++#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) + + #define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) + #define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) +--- a/include/drm/drmP.h ++++ b/include/drm/drmP.h +@@ -104,6 +104,7 @@ struct drm_device; + #define DRIVER_DMA_QUEUE 0x200 + #define DRIVER_FB_DMA 0x400 + #define DRIVER_IRQ_VBL2 0x800 ++#define DRIVER_GEM 0x1000 + + /***********************************************************************/ + /** \name Begin the DRM... */ +@@ -387,6 +388,10 @@ struct drm_file { + struct drm_minor *minor; + int remove_auth_on_close; + unsigned long lock_count; ++ /** Mapping of mm object handles to object pointers. */ ++ struct idr object_idr; ++ /** Lock for synchronization of access to object_idr. */ ++ spinlock_t table_lock; + struct file *filp; + void *driver_priv; + }; +@@ -558,6 +563,56 @@ struct drm_ati_pcigart_info { + }; + + /** ++ * This structure defines the drm_mm memory object, which will be used by the ++ * DRM for its buffer objects. ++ */ ++struct drm_gem_object { ++ /** Reference count of this object */ ++ struct kref refcount; ++ ++ /** Handle count of this object. Each handle also holds a reference */ ++ struct kref handlecount; ++ ++ /** Related drm device */ ++ struct drm_device *dev; ++ ++ /** File representing the shmem storage */ ++ struct file *filp; ++ ++ /** ++ * Size of the object, in bytes. Immutable over the object's ++ * lifetime. ++ */ ++ size_t size; ++ ++ /** ++ * Global name for this object, starts at 1. 0 means unnamed. ++ * Access is covered by the object_name_lock in the related drm_device ++ */ ++ int name; ++ ++ /** ++ * Memory domains. These monitor which caches contain read/write data ++ * related to the object. When transitioning from one set of domains ++ * to another, the driver is called to ensure that caches are suitably ++ * flushed and invalidated ++ */ ++ uint32_t read_domains; ++ uint32_t write_domain; ++ ++ /** ++ * While validating an exec operation, the ++ * new read/write domain values are computed here. ++ * They will be transferred to the above values ++ * at the point that any cache flushing occurs ++ */ ++ uint32_t pending_read_domains; ++ uint32_t pending_write_domain; ++ ++ void *driver_private; ++}; ++ ++/** + * DRM driver structure. This structure represent the common code for + * a family of cards. There will one drm_device for each card present + * in this family +@@ -614,6 +669,18 @@ struct drm_driver { + void (*set_version) (struct drm_device *dev, + struct drm_set_version *sv); + ++ int (*proc_init)(struct drm_minor *minor); ++ void (*proc_cleanup)(struct drm_minor *minor); ++ ++ /** ++ * Driver-specific constructor for drm_gem_objects, to set up ++ * obj->driver_private. ++ * ++ * Returns 0 on success. ++ */ ++ int (*gem_init_object) (struct drm_gem_object *obj); ++ void (*gem_free_object) (struct drm_gem_object *obj); ++ + int major; + int minor; + int patchlevel; +@@ -771,6 +838,22 @@ struct drm_device { + spinlock_t drw_lock; + struct idr drw_idr; + /*@} */ ++ ++ /** \name GEM information */ ++ /*@{ */ ++ spinlock_t object_name_lock; ++ struct idr object_name_idr; ++ atomic_t object_count; ++ atomic_t object_memory; ++ atomic_t pin_count; ++ atomic_t pin_memory; ++ atomic_t gtt_count; ++ atomic_t gtt_memory; ++ uint32_t gtt_total; ++ uint32_t invalidate_domains; /* domains pending invalidation */ ++ uint32_t flush_domains; /* domains pending flush */ ++ /*@} */ ++ + }; + + static __inline__ int drm_core_check_feature(struct drm_device *dev, +@@ -867,6 +950,10 @@ extern void *drm_realloc(void *oldpt, si + extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type); + extern int drm_free_agp(DRM_AGP_MEM * handle, int pages); + extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start); ++extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev, ++ struct page **pages, ++ unsigned long num_pages, ++ uint32_t gtt_offset); + extern int drm_unbind_agp(DRM_AGP_MEM * handle); + + /* Misc. IOCTL support (drm_ioctl.h) */ +@@ -929,6 +1016,9 @@ extern int drm_getmagic(struct drm_devic + extern int drm_authmagic(struct drm_device *dev, void *data, + struct drm_file *file_priv); + ++/* Cache management (drm_cache.c) */ ++void drm_clflush_pages(struct page *pages[], unsigned long num_pages); ++ + /* Locking IOCTL support (drm_lock.h) */ + extern int drm_lock(struct drm_device *dev, void *data, + struct drm_file *file_priv); +@@ -1026,6 +1116,7 @@ extern DRM_AGP_MEM *drm_agp_allocate_mem + extern int drm_agp_free_memory(DRM_AGP_MEM * handle); + extern int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start); + extern int drm_agp_unbind_memory(DRM_AGP_MEM * handle); ++extern void drm_agp_chipset_flush(struct drm_device *dev); + + /* Stub support (drm_stub.h) */ + extern int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent, +@@ -1088,6 +1179,66 @@ extern unsigned long drm_mm_tail_space(s + extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size); + extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size); + ++/* Graphics Execution Manager library functions (drm_gem.c) */ ++int drm_gem_init(struct drm_device *dev); ++void drm_gem_object_free(struct kref *kref); ++struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, ++ size_t size); ++void drm_gem_object_handle_free(struct kref *kref); ++ ++static inline void ++drm_gem_object_reference(struct drm_gem_object *obj) ++{ ++ kref_get(&obj->refcount); ++} ++ ++static inline void ++drm_gem_object_unreference(struct drm_gem_object *obj) ++{ ++ if (obj == NULL) ++ return; ++ ++ kref_put(&obj->refcount, drm_gem_object_free); ++} ++ ++int drm_gem_handle_create(struct drm_file *file_priv, ++ struct drm_gem_object *obj, ++ int *handlep); ++ ++static inline void ++drm_gem_object_handle_reference(struct drm_gem_object *obj) ++{ ++ drm_gem_object_reference(obj); ++ kref_get(&obj->handlecount); ++} ++ ++static inline void ++drm_gem_object_handle_unreference(struct drm_gem_object *obj) ++{ ++ if (obj == NULL) ++ return; ++ ++ /* ++ * Must bump handle count first as this may be the last ++ * ref, in which case the object would disappear before we ++ * checked for a name ++ */ ++ kref_put(&obj->handlecount, drm_gem_object_handle_free); ++ drm_gem_object_unreference(obj); ++} ++ ++struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev, ++ struct drm_file *filp, ++ int handle); ++int drm_gem_close_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int drm_gem_flink_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++int drm_gem_open_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++void drm_gem_open(struct drm_device *dev, struct drm_file *file_private); ++void drm_gem_release(struct drm_device *dev, struct drm_file *file_private); ++ + extern void drm_core_ioremap(struct drm_map *map, struct drm_device *dev); + extern void drm_core_ioremap_wc(struct drm_map *map, struct drm_device *dev); + extern void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev); +--- a/include/drm/i915_drm.h ++++ b/include/drm/i915_drm.h +@@ -143,6 +143,22 @@ typedef struct _drm_i915_sarea { + #define DRM_I915_GET_VBLANK_PIPE 0x0e + #define DRM_I915_VBLANK_SWAP 0x0f + #define DRM_I915_HWS_ADDR 0x11 ++#define DRM_I915_GEM_INIT 0x13 ++#define DRM_I915_GEM_EXECBUFFER 0x14 ++#define DRM_I915_GEM_PIN 0x15 ++#define DRM_I915_GEM_UNPIN 0x16 ++#define DRM_I915_GEM_BUSY 0x17 ++#define DRM_I915_GEM_THROTTLE 0x18 ++#define DRM_I915_GEM_ENTERVT 0x19 ++#define DRM_I915_GEM_LEAVEVT 0x1a ++#define DRM_I915_GEM_CREATE 0x1b ++#define DRM_I915_GEM_PREAD 0x1c ++#define DRM_I915_GEM_PWRITE 0x1d ++#define DRM_I915_GEM_MMAP 0x1e ++#define DRM_I915_GEM_SET_DOMAIN 0x1f ++#define DRM_I915_GEM_SW_FINISH 0x20 ++#define DRM_I915_GEM_SET_TILING 0x21 ++#define DRM_I915_GEM_GET_TILING 0x22 + + #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) + #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) +@@ -160,6 +176,20 @@ typedef struct _drm_i915_sarea { + #define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t) + #define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t) + #define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) ++#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) ++#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) ++#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) ++#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) ++#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) ++#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) ++#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) ++#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) ++#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) ++#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) ++#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) ++#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) ++#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) ++#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) + + /* Allow drivers to submit batchbuffers directly to hardware, relying + * on the security mechanisms provided by hardware. +@@ -200,6 +230,7 @@ typedef struct drm_i915_irq_wait { + #define I915_PARAM_IRQ_ACTIVE 1 + #define I915_PARAM_ALLOW_BATCHBUFFER 2 + #define I915_PARAM_LAST_DISPATCH 3 ++#define I915_PARAM_HAS_GEM 5 + + typedef struct drm_i915_getparam { + int param; +@@ -267,4 +298,305 @@ typedef struct drm_i915_hws_addr { + uint64_t addr; + } drm_i915_hws_addr_t; + ++struct drm_i915_gem_init { ++ /** ++ * Beginning offset in the GTT to be managed by the DRM memory ++ * manager. ++ */ ++ uint64_t gtt_start; ++ /** ++ * Ending offset in the GTT to be managed by the DRM memory ++ * manager. ++ */ ++ uint64_t gtt_end; ++}; ++ ++struct drm_i915_gem_create { ++ /** ++ * Requested size for the object. ++ * ++ * The (page-aligned) allocated size for the object will be returned. ++ */ ++ uint64_t size; ++ /** ++ * Returned handle for the object. ++ * ++ * Object handles are nonzero. ++ */ ++ uint32_t handle; ++ uint32_t pad; ++}; ++ ++struct drm_i915_gem_pread { ++ /** Handle for the object being read. */ ++ uint32_t handle; ++ uint32_t pad; ++ /** Offset into the object to read from */ ++ uint64_t offset; ++ /** Length of data to read */ ++ uint64_t size; ++ /** ++ * Pointer to write the data into. ++ * ++ * This is a fixed-size type for 32/64 compatibility. ++ */ ++ uint64_t data_ptr; ++}; ++ ++struct drm_i915_gem_pwrite { ++ /** Handle for the object being written to. */ ++ uint32_t handle; ++ uint32_t pad; ++ /** Offset into the object to write to */ ++ uint64_t offset; ++ /** Length of data to write */ ++ uint64_t size; ++ /** ++ * Pointer to read the data from. ++ * ++ * This is a fixed-size type for 32/64 compatibility. ++ */ ++ uint64_t data_ptr; ++}; ++ ++struct drm_i915_gem_mmap { ++ /** Handle for the object being mapped. */ ++ uint32_t handle; ++ uint32_t pad; ++ /** Offset in the object to map. */ ++ uint64_t offset; ++ /** ++ * Length of data to map. ++ * ++ * The value will be page-aligned. ++ */ ++ uint64_t size; ++ /** ++ * Returned pointer the data was mapped at. ++ * ++ * This is a fixed-size type for 32/64 compatibility. ++ */ ++ uint64_t addr_ptr; ++}; ++ ++struct drm_i915_gem_set_domain { ++ /** Handle for the object */ ++ uint32_t handle; ++ ++ /** New read domains */ ++ uint32_t read_domains; ++ ++ /** New write domain */ ++ uint32_t write_domain; ++}; ++ ++struct drm_i915_gem_sw_finish { ++ /** Handle for the object */ ++ uint32_t handle; ++}; ++ ++struct drm_i915_gem_relocation_entry { ++ /** ++ * Handle of the buffer being pointed to by this relocation entry. ++ * ++ * It's appealing to make this be an index into the mm_validate_entry ++ * list to refer to the buffer, but this allows the driver to create ++ * a relocation list for state buffers and not re-write it per ++ * exec using the buffer. ++ */ ++ uint32_t target_handle; ++ ++ /** ++ * Value to be added to the offset of the target buffer to make up ++ * the relocation entry. ++ */ ++ uint32_t delta; ++ ++ /** Offset in the buffer the relocation entry will be written into */ ++ uint64_t offset; ++ ++ /** ++ * Offset value of the target buffer that the relocation entry was last ++ * written as. ++ * ++ * If the buffer has the same offset as last time, we can skip syncing ++ * and writing the relocation. This value is written back out by ++ * the execbuffer ioctl when the relocation is written. ++ */ ++ uint64_t presumed_offset; ++ ++ /** ++ * Target memory domains read by this operation. ++ */ ++ uint32_t read_domains; ++ ++ /** ++ * Target memory domains written by this operation. ++ * ++ * Note that only one domain may be written by the whole ++ * execbuffer operation, so that where there are conflicts, ++ * the application will get -EINVAL back. ++ */ ++ uint32_t write_domain; ++}; ++ ++/** @{ ++ * Intel memory domains ++ * ++ * Most of these just align with the various caches in ++ * the system and are used to flush and invalidate as ++ * objects end up cached in different domains. ++ */ ++/** CPU cache */ ++#define I915_GEM_DOMAIN_CPU 0x00000001 ++/** Render cache, used by 2D and 3D drawing */ ++#define I915_GEM_DOMAIN_RENDER 0x00000002 ++/** Sampler cache, used by texture engine */ ++#define I915_GEM_DOMAIN_SAMPLER 0x00000004 ++/** Command queue, used to load batch buffers */ ++#define I915_GEM_DOMAIN_COMMAND 0x00000008 ++/** Instruction cache, used by shader programs */ ++#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 ++/** Vertex address cache */ ++#define I915_GEM_DOMAIN_VERTEX 0x00000020 ++/** GTT domain - aperture and scanout */ ++#define I915_GEM_DOMAIN_GTT 0x00000040 ++/** @} */ ++ ++struct drm_i915_gem_exec_object { ++ /** ++ * User's handle for a buffer to be bound into the GTT for this ++ * operation. ++ */ ++ uint32_t handle; ++ ++ /** Number of relocations to be performed on this buffer */ ++ uint32_t relocation_count; ++ /** ++ * Pointer to array of struct drm_i915_gem_relocation_entry containing ++ * the relocations to be performed in this buffer. ++ */ ++ uint64_t relocs_ptr; ++ ++ /** Required alignment in graphics aperture */ ++ uint64_t alignment; ++ ++ /** ++ * Returned value of the updated offset of the object, for future ++ * presumed_offset writes. ++ */ ++ uint64_t offset; ++}; ++ ++struct drm_i915_gem_execbuffer { ++ /** ++ * List of buffers to be validated with their relocations to be ++ * performend on them. ++ * ++ * This is a pointer to an array of struct drm_i915_gem_validate_entry. ++ * ++ * These buffers must be listed in an order such that all relocations ++ * a buffer is performing refer to buffers that have already appeared ++ * in the validate list. ++ */ ++ uint64_t buffers_ptr; ++ uint32_t buffer_count; ++ ++ /** Offset in the batchbuffer to start execution from. */ ++ uint32_t batch_start_offset; ++ /** Bytes used in batchbuffer from batch_start_offset */ ++ uint32_t batch_len; ++ uint32_t DR1; ++ uint32_t DR4; ++ uint32_t num_cliprects; ++ /** This is a struct drm_clip_rect *cliprects */ ++ uint64_t cliprects_ptr; ++}; ++ ++struct drm_i915_gem_pin { ++ /** Handle of the buffer to be pinned. */ ++ uint32_t handle; ++ uint32_t pad; ++ ++ /** alignment required within the aperture */ ++ uint64_t alignment; ++ ++ /** Returned GTT offset of the buffer. */ ++ uint64_t offset; ++}; ++ ++struct drm_i915_gem_unpin { ++ /** Handle of the buffer to be unpinned. */ ++ uint32_t handle; ++ uint32_t pad; ++}; ++ ++struct drm_i915_gem_busy { ++ /** Handle of the buffer to check for busy */ ++ uint32_t handle; ++ ++ /** Return busy status (1 if busy, 0 if idle) */ ++ uint32_t busy; ++}; ++ ++#define I915_TILING_NONE 0 ++#define I915_TILING_X 1 ++#define I915_TILING_Y 2 ++ ++#define I915_BIT_6_SWIZZLE_NONE 0 ++#define I915_BIT_6_SWIZZLE_9 1 ++#define I915_BIT_6_SWIZZLE_9_10 2 ++#define I915_BIT_6_SWIZZLE_9_11 3 ++#define I915_BIT_6_SWIZZLE_9_10_11 4 ++/* Not seen by userland */ ++#define I915_BIT_6_SWIZZLE_UNKNOWN 5 ++ ++struct drm_i915_gem_set_tiling { ++ /** Handle of the buffer to have its tiling state updated */ ++ uint32_t handle; ++ ++ /** ++ * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, ++ * I915_TILING_Y). ++ * ++ * This value is to be set on request, and will be updated by the ++ * kernel on successful return with the actual chosen tiling layout. ++ * ++ * The tiling mode may be demoted to I915_TILING_NONE when the system ++ * has bit 6 swizzling that can't be managed correctly by GEM. ++ * ++ * Buffer contents become undefined when changing tiling_mode. ++ */ ++ uint32_t tiling_mode; ++ ++ /** ++ * Stride in bytes for the object when in I915_TILING_X or ++ * I915_TILING_Y. ++ */ ++ uint32_t stride; ++ ++ /** ++ * Returned address bit 6 swizzling required for CPU access through ++ * mmap mapping. ++ */ ++ uint32_t swizzle_mode; ++}; ++ ++struct drm_i915_gem_get_tiling { ++ /** Handle of the buffer to get tiling state for. */ ++ uint32_t handle; ++ ++ /** ++ * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, ++ * I915_TILING_Y). ++ */ ++ uint32_t tiling_mode; ++ ++ /** ++ * Returned address bit 6 swizzling required for CPU access through ++ * mmap mapping. ++ */ ++ uint32_t swizzle_mode; ++}; ++ + #endif /* _I915_DRM_H_ */ + diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0009-squashfs3.3-2.6.27.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0009-squashfs3.3-2.6.27.patch new file mode 100644 index 000000000..4de9839c7 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0009-squashfs3.3-2.6.27.patch @@ -0,0 +1,6727 @@ +diff -uNr a/fs/Kconfig b/fs/Kconfig +--- a/fs/Kconfig 2008-07-28 19:40:31.000000000 -0700 ++++ b/fs/Kconfig 2008-08-13 16:19:56.000000000 -0700 +@@ -1348,6 +1348,56 @@ + + If unsure, say N. + ++config SQUASHFS ++ tristate "SquashFS 3.3 - Squashed file system support" ++ select ZLIB_INFLATE ++ help ++ Saying Y here includes support for SquashFS 3.3 (a Compressed ++ Read-Only File System). Squashfs is a highly compressed read-only ++ filesystem for Linux. It uses zlib compression to compress both ++ files, inodes and directories. Inodes in the system are very small ++ and all blocks are packed to minimise data overhead. Block sizes ++ greater than 4K are supported up to a maximum of 1 Mbytes (default ++ block size 128K). SquashFS 3.3 supports 64 bit filesystems and files ++ (larger than 4GB), full uid/gid information, hard links and timestamps. ++ ++ Squashfs is intended for general read-only filesystem use, for ++ archival use (i.e. in cases where a .tar.gz file may be used), and in ++ embedded systems where low overhead is needed. Further information ++ and filesystem tools are available from http://squashfs.sourceforge.net. ++ ++ If you want to compile this as a module ( = code which can be ++ inserted in and removed from the running kernel whenever you want), ++ say M here and read <file:Documentation/modules.txt>. The module ++ will be called squashfs. Note that the root file system (the one ++ containing the directory /) cannot be compiled as a module. ++ ++ If unsure, say N. ++ ++config SQUASHFS_EMBEDDED ++ ++ bool "Additional option for memory-constrained systems" ++ depends on SQUASHFS ++ default n ++ help ++ Saying Y here allows you to specify cache size. ++ ++ If unsure, say N. ++ ++config SQUASHFS_FRAGMENT_CACHE_SIZE ++ int "Number of fragments cached" if SQUASHFS_EMBEDDED ++ depends on SQUASHFS ++ default "3" ++ help ++ By default SquashFS caches the last 3 fragments read from ++ the filesystem. Increasing this amount may mean SquashFS ++ has to re-read fragments less often from disk, at the expense ++ of extra system memory. Decreasing this amount will mean ++ SquashFS uses less memory at the expense of extra reads from disk. ++ ++ Note there must be at least one cached fragment. Anything ++ much more than three will probably not make much difference. ++ + config VXFS_FS + tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" + depends on BLOCK +diff -uNr a/fs/Kconfig.orig b/fs/Kconfig.orig +--- a/fs/Kconfig.orig 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/Kconfig.orig 2008-07-28 19:40:31.000000000 -0700 +@@ -0,0 +1,2097 @@ ++# ++# File system configuration ++# ++ ++menu "File systems" ++ ++if BLOCK ++ ++config EXT2_FS ++ tristate "Second extended fs support" ++ help ++ Ext2 is a standard Linux file system for hard disks. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ext2. ++ ++ If unsure, say Y. ++ ++config EXT2_FS_XATTR ++ bool "Ext2 extended attributes" ++ depends on EXT2_FS ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ <http://acl.bestbits.at/> for details). ++ ++ If unsure, say N. ++ ++config EXT2_FS_POSIX_ACL ++ bool "Ext2 POSIX Access Control Lists" ++ depends on EXT2_FS_XATTR ++ select FS_POSIX_ACL ++ help ++ Posix Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the Posix ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N ++ ++config EXT2_FS_SECURITY ++ bool "Ext2 Security Labels" ++ depends on EXT2_FS_XATTR ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the ext2 filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++config EXT2_FS_XIP ++ bool "Ext2 execute in place support" ++ depends on EXT2_FS && MMU ++ help ++ Execute in place can be used on memory-backed block devices. If you ++ enable this option, you can select to mount block devices which are ++ capable of this feature without using the page cache. ++ ++ If you do not use a block device that is capable of using this, ++ or if unsure, say N. ++ ++config FS_XIP ++# execute in place ++ bool ++ depends on EXT2_FS_XIP ++ default y ++ ++config EXT3_FS ++ tristate "Ext3 journalling file system support" ++ select JBD ++ help ++ This is the journalling version of the Second extended file system ++ (often called ext3), the de facto standard Linux file system ++ (method to organize files on a storage device) for hard disks. ++ ++ The journalling code included in this driver means you do not have ++ to run e2fsck (file system checker) on your file systems after a ++ crash. The journal keeps track of any changes that were being made ++ at the time the system crashed, and can ensure that your file system ++ is consistent without the need for a lengthy check. ++ ++ Other than adding the journal to the file system, the on-disk format ++ of ext3 is identical to ext2. It is possible to freely switch ++ between using the ext3 driver and the ext2 driver, as long as the ++ file system has been cleanly unmounted, or e2fsck is run on the file ++ system. ++ ++ To add a journal on an existing ext2 file system or change the ++ behavior of ext3 file systems, you can use the tune2fs utility ("man ++ tune2fs"). To modify attributes of files and directories on ext3 ++ file systems, use chattr ("man chattr"). You need to be using ++ e2fsprogs version 1.20 or later in order to create ext3 journals ++ (available at <http://sourceforge.net/projects/e2fsprogs/>). ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ext3. ++ ++config EXT3_FS_XATTR ++ bool "Ext3 extended attributes" ++ depends on EXT3_FS ++ default y ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ <http://acl.bestbits.at/> for details). ++ ++ If unsure, say N. ++ ++ You need this for POSIX ACL support on ext3. ++ ++config EXT3_FS_POSIX_ACL ++ bool "Ext3 POSIX Access Control Lists" ++ depends on EXT3_FS_XATTR ++ select FS_POSIX_ACL ++ help ++ Posix Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the Posix ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N ++ ++config EXT3_FS_SECURITY ++ bool "Ext3 Security Labels" ++ depends on EXT3_FS_XATTR ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the ext3 filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++config EXT4DEV_FS ++ tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ select JBD2 ++ select CRC16 ++ help ++ Ext4dev is a predecessor filesystem of the next generation ++ extended fs ext4, based on ext3 filesystem code. It will be ++ renamed ext4 fs later, once ext4dev is mature and stabilized. ++ ++ Unlike the change from ext2 filesystem to ext3 filesystem, ++ the on-disk format of ext4dev is not the same as ext3 any more: ++ it is based on extent maps and it supports 48-bit physical block ++ numbers. These combined on-disk format changes will allow ++ ext4dev/ext4 to handle more than 16 TB filesystem volumes -- ++ a hard limit that ext3 cannot overcome without changing the ++ on-disk format. ++ ++ Other than extent maps and 48-bit block numbers, ext4dev also is ++ likely to have other new features such as persistent preallocation, ++ high resolution time stamps, and larger file support etc. These ++ features will be added to ext4dev gradually. ++ ++ To compile this file system support as a module, choose M here. The ++ module will be called ext4dev. ++ ++ If unsure, say N. ++ ++config EXT4DEV_FS_XATTR ++ bool "Ext4dev extended attributes" ++ depends on EXT4DEV_FS ++ default y ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ <http://acl.bestbits.at/> for details). ++ ++ If unsure, say N. ++ ++ You need this for POSIX ACL support on ext4dev/ext4. ++ ++config EXT4DEV_FS_POSIX_ACL ++ bool "Ext4dev POSIX Access Control Lists" ++ depends on EXT4DEV_FS_XATTR ++ select FS_POSIX_ACL ++ help ++ POSIX Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the POSIX ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N ++ ++config EXT4DEV_FS_SECURITY ++ bool "Ext4dev Security Labels" ++ depends on EXT4DEV_FS_XATTR ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the ext4dev/ext4 filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++config JBD ++ tristate ++ help ++ This is a generic journalling layer for block devices. It is ++ currently used by the ext3 and OCFS2 file systems, but it could ++ also be used to add journal support to other file systems or block ++ devices such as RAID or LVM. ++ ++ If you are using the ext3 or OCFS2 file systems, you need to ++ say Y here. If you are not using ext3 OCFS2 then you will probably ++ want to say N. ++ ++ To compile this device as a module, choose M here: the module will be ++ called jbd. If you are compiling ext3 or OCFS2 into the kernel, ++ you cannot compile this code as a module. ++ ++config JBD_DEBUG ++ bool "JBD (ext3) debugging support" ++ depends on JBD && DEBUG_FS ++ help ++ If you are using the ext3 journaled file system (or potentially any ++ other file system/device using JBD), this option allows you to ++ enable debugging output while the system is running, in order to ++ help track down any problems you are having. By default the ++ debugging output will be turned off. ++ ++ If you select Y here, then you will be able to turn on debugging ++ with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a ++ number between 1 and 5, the higher the number, the more debugging ++ output is generated. To turn debugging off again, do ++ "echo 0 > /sys/kernel/debug/jbd/jbd-debug". ++ ++config JBD2 ++ tristate ++ select CRC32 ++ help ++ This is a generic journaling layer for block devices that support ++ both 32-bit and 64-bit block numbers. It is currently used by ++ the ext4dev/ext4 filesystem, but it could also be used to add ++ journal support to other file systems or block devices such ++ as RAID or LVM. ++ ++ If you are using ext4dev/ext4, you need to say Y here. If you are not ++ using ext4dev/ext4 then you will probably want to say N. ++ ++ To compile this device as a module, choose M here. The module will be ++ called jbd2. If you are compiling ext4dev/ext4 into the kernel, ++ you cannot compile this code as a module. ++ ++config JBD2_DEBUG ++ bool "JBD2 (ext4dev/ext4) debugging support" ++ depends on JBD2 && DEBUG_FS ++ help ++ If you are using the ext4dev/ext4 journaled file system (or ++ potentially any other filesystem/device using JBD2), this option ++ allows you to enable debugging output while the system is running, ++ in order to help track down any problems you are having. ++ By default, the debugging output will be turned off. ++ ++ If you select Y here, then you will be able to turn on debugging ++ with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a ++ number between 1 and 5. The higher the number, the more debugging ++ output is generated. To turn debugging off again, do ++ "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". ++ ++config FS_MBCACHE ++# Meta block cache for Extended Attributes (ext2/ext3/ext4) ++ tristate ++ depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR ++ default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y ++ default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m ++ ++config REISERFS_FS ++ tristate "Reiserfs support" ++ help ++ Stores not just filenames but the files themselves in a balanced ++ tree. Uses journalling. ++ ++ Balanced trees are more efficient than traditional file system ++ architectural foundations. ++ ++ In general, ReiserFS is as fast as ext2, but is very efficient with ++ large directories and small files. Additional patches are needed ++ for NFS and quotas, please see <http://www.namesys.com/> for links. ++ ++ It is more easily extended to have features currently found in ++ database and keyword search systems than block allocation based file ++ systems are. The next version will be so extended, and will support ++ plugins consistent with our motto ``It takes more than a license to ++ make source code open.'' ++ ++ Read <http://www.namesys.com/> to learn more about reiserfs. ++ ++ Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com. ++ ++ If you like it, you can pay us to add new features to it that you ++ need, buy a support contract, or pay us to port it to another OS. ++ ++config REISERFS_CHECK ++ bool "Enable reiserfs debug mode" ++ depends on REISERFS_FS ++ help ++ If you set this to Y, then ReiserFS will perform every check it can ++ possibly imagine of its internal consistency throughout its ++ operation. It will also go substantially slower. More than once we ++ have forgotten that this was on, and then gone despondent over the ++ latest benchmarks.:-) Use of this option allows our team to go all ++ out in checking for consistency when debugging without fear of its ++ effect on end users. If you are on the verge of sending in a bug ++ report, say Y and you might get a useful error message. Almost ++ everyone should say N. ++ ++config REISERFS_PROC_INFO ++ bool "Stats in /proc/fs/reiserfs" ++ depends on REISERFS_FS && PROC_FS ++ help ++ Create under /proc/fs/reiserfs a hierarchy of files, displaying ++ various ReiserFS statistics and internal data at the expense of ++ making your kernel or module slightly larger (+8 KB). This also ++ increases the amount of kernel memory required for each mount. ++ Almost everyone but ReiserFS developers and people fine-tuning ++ reiserfs or tracing problems should say N. ++ ++config REISERFS_FS_XATTR ++ bool "ReiserFS extended attributes" ++ depends on REISERFS_FS ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ <http://acl.bestbits.at/> for details). ++ ++ If unsure, say N. ++ ++config REISERFS_FS_POSIX_ACL ++ bool "ReiserFS POSIX Access Control Lists" ++ depends on REISERFS_FS_XATTR ++ select FS_POSIX_ACL ++ help ++ Posix Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the Posix ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N ++ ++config REISERFS_FS_SECURITY ++ bool "ReiserFS Security Labels" ++ depends on REISERFS_FS_XATTR ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the ReiserFS filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++config JFS_FS ++ tristate "JFS filesystem support" ++ select NLS ++ help ++ This is a port of IBM's Journaled Filesystem . More information is ++ available in the file <file:Documentation/filesystems/jfs.txt>. ++ ++ If you do not intend to use the JFS filesystem, say N. ++ ++config JFS_POSIX_ACL ++ bool "JFS POSIX Access Control Lists" ++ depends on JFS_FS ++ select FS_POSIX_ACL ++ help ++ Posix Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the Posix ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N ++ ++config JFS_SECURITY ++ bool "JFS Security Labels" ++ depends on JFS_FS ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the jfs filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++config JFS_DEBUG ++ bool "JFS debugging" ++ depends on JFS_FS ++ help ++ If you are experiencing any problems with the JFS filesystem, say ++ Y here. This will result in additional debugging messages to be ++ written to the system log. Under normal circumstances, this ++ results in very little overhead. ++ ++config JFS_STATISTICS ++ bool "JFS statistics" ++ depends on JFS_FS ++ help ++ Enabling this option will cause statistics from the JFS file system ++ to be made available to the user in the /proc/fs/jfs/ directory. ++ ++config FS_POSIX_ACL ++# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4) ++# ++# NOTE: you can implement Posix ACLs without these helpers (XFS does). ++# Never use this symbol for ifdefs. ++# ++ bool ++ default n ++ ++source "fs/xfs/Kconfig" ++source "fs/gfs2/Kconfig" ++ ++config OCFS2_FS ++ tristate "OCFS2 file system support" ++ depends on NET && SYSFS ++ select CONFIGFS_FS ++ select JBD ++ select CRC32 ++ help ++ OCFS2 is a general purpose extent based shared disk cluster file ++ system with many similarities to ext3. It supports 64 bit inode ++ numbers, and has automatically extending metadata groups which may ++ also make it attractive for non-clustered use. ++ ++ You'll want to install the ocfs2-tools package in order to at least ++ get "mount.ocfs2". ++ ++ Project web page: http://oss.oracle.com/projects/ocfs2 ++ Tools web page: http://oss.oracle.com/projects/ocfs2-tools ++ OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ ++ ++ For more information on OCFS2, see the file ++ <file:Documentation/filesystems/ocfs2.txt>. ++ ++config OCFS2_FS_O2CB ++ tristate "O2CB Kernelspace Clustering" ++ depends on OCFS2_FS ++ default y ++ help ++ OCFS2 includes a simple kernelspace clustering package, the OCFS2 ++ Cluster Base. It only requires a very small userspace component ++ to configure it. This comes with the standard ocfs2-tools package. ++ O2CB is limited to maintaining a cluster for OCFS2 file systems. ++ It cannot manage any other cluster applications. ++ ++ It is always safe to say Y here, as the clustering method is ++ run-time selectable. ++ ++config OCFS2_FS_USERSPACE_CLUSTER ++ tristate "OCFS2 Userspace Clustering" ++ depends on OCFS2_FS && DLM ++ default y ++ help ++ This option will allow OCFS2 to use userspace clustering services ++ in conjunction with the DLM in fs/dlm. If you are using a ++ userspace cluster manager, say Y here. ++ ++ It is safe to say Y, as the clustering method is run-time ++ selectable. ++ ++config OCFS2_FS_STATS ++ bool "OCFS2 statistics" ++ depends on OCFS2_FS ++ default y ++ help ++ This option allows some fs statistics to be captured. Enabling ++ this option may increase the memory consumption. ++ ++config OCFS2_DEBUG_MASKLOG ++ bool "OCFS2 logging support" ++ depends on OCFS2_FS ++ default y ++ help ++ The ocfs2 filesystem has an extensive logging system. The system ++ allows selection of events to log via files in /sys/o2cb/logmask/. ++ This option will enlarge your kernel, but it allows debugging of ++ ocfs2 filesystem issues. ++ ++config OCFS2_DEBUG_FS ++ bool "OCFS2 expensive checks" ++ depends on OCFS2_FS ++ default n ++ help ++ This option will enable expensive consistency checks. Enable ++ this option for debugging only as it is likely to decrease ++ performance of the filesystem. ++ ++endif # BLOCK ++ ++config DNOTIFY ++ bool "Dnotify support" ++ default y ++ help ++ Dnotify is a directory-based per-fd file change notification system ++ that uses signals to communicate events to user-space. There exist ++ superior alternatives, but some applications may still rely on ++ dnotify. ++ ++ If unsure, say Y. ++ ++config INOTIFY ++ bool "Inotify file change notification support" ++ default y ++ ---help--- ++ Say Y here to enable inotify support. Inotify is a file change ++ notification system and a replacement for dnotify. Inotify fixes ++ numerous shortcomings in dnotify and introduces several new features ++ including multiple file events, one-shot support, and unmount ++ notification. ++ ++ For more information, see <file:Documentation/filesystems/inotify.txt> ++ ++ If unsure, say Y. ++ ++config INOTIFY_USER ++ bool "Inotify support for userspace" ++ depends on INOTIFY ++ default y ++ ---help--- ++ Say Y here to enable inotify support for userspace, including the ++ associated system calls. Inotify allows monitoring of both files and ++ directories via a single open fd. Events are read from the file ++ descriptor, which is also select()- and poll()-able. ++ ++ For more information, see <file:Documentation/filesystems/inotify.txt> ++ ++ If unsure, say Y. ++ ++config QUOTA ++ bool "Quota support" ++ help ++ If you say Y here, you will be able to set per user limits for disk ++ usage (also called disk quotas). Currently, it works for the ++ ext2, ext3, and reiserfs file system. ext3 also supports journalled ++ quotas for which you don't need to run quotacheck(8) after an unclean ++ shutdown. ++ For further details, read the Quota mini-HOWTO, available from ++ <http://www.tldp.org/docs.html#howto>, or the documentation provided ++ with the quota tools. Probably the quota support is only useful for ++ multi user systems. If unsure, say N. ++ ++config QUOTA_NETLINK_INTERFACE ++ bool "Report quota messages through netlink interface" ++ depends on QUOTA && NET ++ help ++ If you say Y here, quota warnings (about exceeding softlimit, reaching ++ hardlimit, etc.) will be reported through netlink interface. If unsure, ++ say Y. ++ ++config PRINT_QUOTA_WARNING ++ bool "Print quota warnings to console (OBSOLETE)" ++ depends on QUOTA ++ default y ++ help ++ If you say Y here, quota warnings (about exceeding softlimit, reaching ++ hardlimit, etc.) will be printed to the process' controlling terminal. ++ Note that this behavior is currently deprecated and may go away in ++ future. Please use notification via netlink socket instead. ++ ++config QFMT_V1 ++ tristate "Old quota format support" ++ depends on QUOTA ++ help ++ This quota format was (is) used by kernels earlier than 2.4.22. If ++ you have quota working and you don't want to convert to new quota ++ format say Y here. ++ ++config QFMT_V2 ++ tristate "Quota format v2 support" ++ depends on QUOTA ++ help ++ This quota format allows using quotas with 32-bit UIDs/GIDs. If you ++ need this functionality say Y here. ++ ++config QUOTACTL ++ bool ++ depends on XFS_QUOTA || QUOTA ++ default y ++ ++config AUTOFS_FS ++ tristate "Kernel automounter support" ++ help ++ The automounter is a tool to automatically mount remote file systems ++ on demand. This implementation is partially kernel-based to reduce ++ overhead in the already-mounted case; this is unlike the BSD ++ automounter (amd), which is a pure user space daemon. ++ ++ To use the automounter you need the user-space tools from the autofs ++ package; you can find the location in <file:Documentation/Changes>. ++ You also want to answer Y to "NFS file system support", below. ++ ++ If you want to use the newer version of the automounter with more ++ features, say N here and say Y to "Kernel automounter v4 support", ++ below. ++ ++ To compile this support as a module, choose M here: the module will be ++ called autofs. ++ ++ If you are not a part of a fairly large, distributed network, you ++ probably do not need an automounter, and can say N here. ++ ++config AUTOFS4_FS ++ tristate "Kernel automounter version 4 support (also supports v3)" ++ help ++ The automounter is a tool to automatically mount remote file systems ++ on demand. This implementation is partially kernel-based to reduce ++ overhead in the already-mounted case; this is unlike the BSD ++ automounter (amd), which is a pure user space daemon. ++ ++ To use the automounter you need the user-space tools from ++ <ftp://ftp.kernel.org/pub/linux/daemons/autofs/v4/>; you also ++ want to answer Y to "NFS file system support", below. ++ ++ To compile this support as a module, choose M here: the module will be ++ called autofs4. You will need to add "alias autofs autofs4" to your ++ modules configuration file. ++ ++ If you are not a part of a fairly large, distributed network or ++ don't have a laptop which needs to dynamically reconfigure to the ++ local network, you probably do not need an automounter, and can say ++ N here. ++ ++config FUSE_FS ++ tristate "Filesystem in Userspace support" ++ help ++ With FUSE it is possible to implement a fully functional filesystem ++ in a userspace program. ++ ++ There's also companion library: libfuse. This library along with ++ utilities is available from the FUSE homepage: ++ <http://fuse.sourceforge.net/> ++ ++ See <file:Documentation/filesystems/fuse.txt> for more information. ++ See <file:Documentation/Changes> for needed library/utility version. ++ ++ If you want to develop a userspace FS, or if you want to use ++ a filesystem based on FUSE, answer Y or M. ++ ++config GENERIC_ACL ++ bool ++ select FS_POSIX_ACL ++ ++if BLOCK ++menu "CD-ROM/DVD Filesystems" ++ ++config ISO9660_FS ++ tristate "ISO 9660 CDROM file system support" ++ help ++ This is the standard file system used on CD-ROMs. It was previously ++ known as "High Sierra File System" and is called "hsfs" on other ++ Unix systems. The so-called Rock-Ridge extensions which allow for ++ long Unix filenames and symbolic links are also supported by this ++ driver. If you have a CD-ROM drive and want to do more with it than ++ just listen to audio CDs and watch its LEDs, say Y (and read ++ <file:Documentation/filesystems/isofs.txt> and the CD-ROM-HOWTO, ++ available from <http://www.tldp.org/docs.html#howto>), thereby ++ enlarging your kernel by about 27 KB; otherwise say N. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called isofs. ++ ++config JOLIET ++ bool "Microsoft Joliet CDROM extensions" ++ depends on ISO9660_FS ++ select NLS ++ help ++ Joliet is a Microsoft extension for the ISO 9660 CD-ROM file system ++ which allows for long filenames in unicode format (unicode is the ++ new 16 bit character code, successor to ASCII, which encodes the ++ characters of almost all languages of the world; see ++ <http://www.unicode.org/> for more information). Say Y here if you ++ want to be able to read Joliet CD-ROMs under Linux. ++ ++config ZISOFS ++ bool "Transparent decompression extension" ++ depends on ISO9660_FS ++ select ZLIB_INFLATE ++ help ++ This is a Linux-specific extension to RockRidge which lets you store ++ data in compressed form on a CD-ROM and have it transparently ++ decompressed when the CD-ROM is accessed. See ++ <http://www.kernel.org/pub/linux/utils/fs/zisofs/> for the tools ++ necessary to create such a filesystem. Say Y here if you want to be ++ able to read such compressed CD-ROMs. ++ ++config UDF_FS ++ tristate "UDF file system support" ++ select CRC_ITU_T ++ help ++ This is the new file system used on some CD-ROMs and DVDs. Say Y if ++ you intend to mount DVD discs or CDRW's written in packet mode, or ++ if written to by other UDF utilities, such as DirectCD. ++ Please read <file:Documentation/filesystems/udf.txt>. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called udf. ++ ++ If unsure, say N. ++ ++config UDF_NLS ++ bool ++ default y ++ depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) ++ ++endmenu ++endif # BLOCK ++ ++if BLOCK ++menu "DOS/FAT/NT Filesystems" ++ ++config FAT_FS ++ tristate ++ select NLS ++ help ++ If you want to use one of the FAT-based file systems (the MS-DOS and ++ VFAT (Windows 95) file systems), then you must say Y or M here ++ to include FAT support. You will then be able to mount partitions or ++ diskettes with FAT-based file systems and transparently access the ++ files on them, i.e. MSDOS files will look and behave just like all ++ other Unix files. ++ ++ This FAT support is not a file system in itself, it only provides ++ the foundation for the other file systems. You will have to say Y or ++ M to at least one of "MSDOS fs support" or "VFAT fs support" in ++ order to make use of it. ++ ++ Another way to read and write MSDOS floppies and hard drive ++ partitions from within Linux (but not transparently) is with the ++ mtools ("man mtools") program suite. You don't need to say Y here in ++ order to do that. ++ ++ If you need to move large files on floppies between a DOS and a ++ Linux box, say Y here, mount the floppy under Linux with an MSDOS ++ file system and use GNU tar's M option. GNU tar is a program ++ available for Unix and DOS ("man tar" or "info tar"). ++ ++ The FAT support will enlarge your kernel by about 37 KB. If unsure, ++ say Y. ++ ++ To compile this as a module, choose M here: the module will be called ++ fat. Note that if you compile the FAT support as a module, you ++ cannot compile any of the FAT-based file systems into the kernel ++ -- they will have to be modules as well. ++ ++config MSDOS_FS ++ tristate "MSDOS fs support" ++ select FAT_FS ++ help ++ This allows you to mount MSDOS partitions of your hard drive (unless ++ they are compressed; to access compressed MSDOS partitions under ++ Linux, you can either use the DOS emulator DOSEMU, described in the ++ DOSEMU-HOWTO, available from ++ <http://www.tldp.org/docs.html#howto>, or try dmsdosfs in ++ <ftp://ibiblio.org/pub/Linux/system/filesystems/dosfs/>. If you ++ intend to use dosemu with a non-compressed MSDOS partition, say Y ++ here) and MSDOS floppies. This means that file access becomes ++ transparent, i.e. the MSDOS files look and behave just like all ++ other Unix files. ++ ++ If you have Windows 95 or Windows NT installed on your MSDOS ++ partitions, you should use the VFAT file system (say Y to "VFAT fs ++ support" below), or you will not be able to see the long filenames ++ generated by Windows 95 / Windows NT. ++ ++ This option will enlarge your kernel by about 7 KB. If unsure, ++ answer Y. This will only work if you said Y to "DOS FAT fs support" ++ as well. To compile this as a module, choose M here: the module will ++ be called msdos. ++ ++config VFAT_FS ++ tristate "VFAT (Windows-95) fs support" ++ select FAT_FS ++ help ++ This option provides support for normal Windows file systems with ++ long filenames. That includes non-compressed FAT-based file systems ++ used by Windows 95, Windows 98, Windows NT 4.0, and the Unix ++ programs from the mtools package. ++ ++ The VFAT support enlarges your kernel by about 10 KB and it only ++ works if you said Y to the "DOS FAT fs support" above. Please read ++ the file <file:Documentation/filesystems/vfat.txt> for details. If ++ unsure, say Y. ++ ++ To compile this as a module, choose M here: the module will be called ++ vfat. ++ ++config FAT_DEFAULT_CODEPAGE ++ int "Default codepage for FAT" ++ depends on MSDOS_FS || VFAT_FS ++ default 437 ++ help ++ This option should be set to the codepage of your FAT filesystems. ++ It can be overridden with the "codepage" mount option. ++ See <file:Documentation/filesystems/vfat.txt> for more information. ++ ++config FAT_DEFAULT_IOCHARSET ++ string "Default iocharset for FAT" ++ depends on VFAT_FS ++ default "iso8859-1" ++ help ++ Set this to the default input/output character set you'd ++ like FAT to use. It should probably match the character set ++ that most of your FAT filesystems use, and can be overridden ++ with the "iocharset" mount option for FAT filesystems. ++ Note that "utf8" is not recommended for FAT filesystems. ++ If unsure, you shouldn't set "utf8" here. ++ See <file:Documentation/filesystems/vfat.txt> for more information. ++ ++config NTFS_FS ++ tristate "NTFS file system support" ++ select NLS ++ help ++ NTFS is the file system of Microsoft Windows NT, 2000, XP and 2003. ++ ++ Saying Y or M here enables read support. There is partial, but ++ safe, write support available. For write support you must also ++ say Y to "NTFS write support" below. ++ ++ There are also a number of user-space tools available, called ++ ntfsprogs. These include ntfsundelete and ntfsresize, that work ++ without NTFS support enabled in the kernel. ++ ++ This is a rewrite from scratch of Linux NTFS support and replaced ++ the old NTFS code starting with Linux 2.5.11. A backport to ++ the Linux 2.4 kernel series is separately available as a patch ++ from the project web site. ++ ++ For more information see <file:Documentation/filesystems/ntfs.txt> ++ and <http://www.linux-ntfs.org/>. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ntfs. ++ ++ If you are not using Windows NT, 2000, XP or 2003 in addition to ++ Linux on your computer it is safe to say N. ++ ++config NTFS_DEBUG ++ bool "NTFS debugging support" ++ depends on NTFS_FS ++ help ++ If you are experiencing any problems with the NTFS file system, say ++ Y here. This will result in additional consistency checks to be ++ performed by the driver as well as additional debugging messages to ++ be written to the system log. Note that debugging messages are ++ disabled by default. To enable them, supply the option debug_msgs=1 ++ at the kernel command line when booting the kernel or as an option ++ to insmod when loading the ntfs module. Once the driver is active, ++ you can enable debugging messages by doing (as root): ++ echo 1 > /proc/sys/fs/ntfs-debug ++ Replacing the "1" with "0" would disable debug messages. ++ ++ If you leave debugging messages disabled, this results in little ++ overhead, but enabling debug messages results in very significant ++ slowdown of the system. ++ ++ When reporting bugs, please try to have available a full dump of ++ debugging messages while the misbehaviour was occurring. ++ ++config NTFS_RW ++ bool "NTFS write support" ++ depends on NTFS_FS ++ help ++ This enables the partial, but safe, write support in the NTFS driver. ++ ++ The only supported operation is overwriting existing files, without ++ changing the file length. No file or directory creation, deletion or ++ renaming is possible. Note only non-resident files can be written to ++ so you may find that some very small files (<500 bytes or so) cannot ++ be written to. ++ ++ While we cannot guarantee that it will not damage any data, we have ++ so far not received a single report where the driver would have ++ damaged someones data so we assume it is perfectly safe to use. ++ ++ Note: While write support is safe in this version (a rewrite from ++ scratch of the NTFS support), it should be noted that the old NTFS ++ write support, included in Linux 2.5.10 and before (since 1997), ++ is not safe. ++ ++ This is currently useful with TopologiLinux. TopologiLinux is run ++ on top of any DOS/Microsoft Windows system without partitioning your ++ hard disk. Unlike other Linux distributions TopologiLinux does not ++ need its own partition. For more information see ++ <http://topologi-linux.sourceforge.net/> ++ ++ It is perfectly safe to say N here. ++ ++endmenu ++endif # BLOCK ++ ++menu "Pseudo filesystems" ++ ++source "fs/proc/Kconfig" ++ ++config SYSFS ++ bool "sysfs file system support" if EMBEDDED ++ default y ++ help ++ The sysfs filesystem is a virtual filesystem that the kernel uses to ++ export internal kernel objects, their attributes, and their ++ relationships to one another. ++ ++ Users can use sysfs to ascertain useful information about the running ++ kernel, such as the devices the kernel has discovered on each bus and ++ which driver each is bound to. sysfs can also be used to tune devices ++ and other kernel subsystems. ++ ++ Some system agents rely on the information in sysfs to operate. ++ /sbin/hotplug uses device and object attributes in sysfs to assist in ++ delegating policy decisions, like persistently naming devices. ++ ++ sysfs is currently used by the block subsystem to mount the root ++ partition. If sysfs is disabled you must specify the boot device on ++ the kernel boot command line via its major and minor numbers. For ++ example, "root=03:01" for /dev/hda1. ++ ++ Designers of embedded systems may wish to say N here to conserve space. ++ ++config TMPFS ++ bool "Virtual memory file system support (former shm fs)" ++ help ++ Tmpfs is a file system which keeps all files in virtual memory. ++ ++ Everything in tmpfs is temporary in the sense that no files will be ++ created on your hard drive. The files live in memory and swap ++ space. If you unmount a tmpfs instance, everything stored therein is ++ lost. ++ ++ See <file:Documentation/filesystems/tmpfs.txt> for details. ++ ++config TMPFS_POSIX_ACL ++ bool "Tmpfs POSIX Access Control Lists" ++ depends on TMPFS ++ select GENERIC_ACL ++ help ++ POSIX Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the POSIX ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N. ++ ++config HUGETLBFS ++ bool "HugeTLB file system support" ++ depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ ++ (S390 && 64BIT) || BROKEN ++ help ++ hugetlbfs is a filesystem backing for HugeTLB pages, based on ++ ramfs. For architectures that support it, say Y here and read ++ <file:Documentation/vm/hugetlbpage.txt> for details. ++ ++ If unsure, say N. ++ ++config HUGETLB_PAGE ++ def_bool HUGETLBFS ++ ++config CONFIGFS_FS ++ tristate "Userspace-driven configuration filesystem" ++ depends on SYSFS ++ help ++ configfs is a ram-based filesystem that provides the converse ++ of sysfs's functionality. Where sysfs is a filesystem-based ++ view of kernel objects, configfs is a filesystem-based manager ++ of kernel objects, or config_items. ++ ++ Both sysfs and configfs can and should exist together on the ++ same system. One is not a replacement for the other. ++ ++endmenu ++ ++menu "Miscellaneous filesystems" ++ ++config ADFS_FS ++ tristate "ADFS file system support (EXPERIMENTAL)" ++ depends on BLOCK && EXPERIMENTAL ++ help ++ The Acorn Disc Filing System is the standard file system of the ++ RiscOS operating system which runs on Acorn's ARM-based Risc PC ++ systems and the Acorn Archimedes range of machines. If you say Y ++ here, Linux will be able to read from ADFS partitions on hard drives ++ and from ADFS-formatted floppy discs. If you also want to be able to ++ write to those devices, say Y to "ADFS write support" below. ++ ++ The ADFS partition should be the first partition (i.e., ++ /dev/[hs]d?1) on each of your drives. Please read the file ++ <file:Documentation/filesystems/adfs.txt> for further details. ++ ++ To compile this code as a module, choose M here: the module will be ++ called adfs. ++ ++ If unsure, say N. ++ ++config ADFS_FS_RW ++ bool "ADFS write support (DANGEROUS)" ++ depends on ADFS_FS ++ help ++ If you say Y here, you will be able to write to ADFS partitions on ++ hard drives and ADFS-formatted floppy disks. This is experimental ++ codes, so if you're unsure, say N. ++ ++config AFFS_FS ++ tristate "Amiga FFS file system support (EXPERIMENTAL)" ++ depends on BLOCK && EXPERIMENTAL ++ help ++ The Fast File System (FFS) is the common file system used on hard ++ disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y ++ if you want to be able to read and write files from and to an Amiga ++ FFS partition on your hard drive. Amiga floppies however cannot be ++ read with this driver due to an incompatibility of the floppy ++ controller used in an Amiga and the standard floppy controller in ++ PCs and workstations. Read <file:Documentation/filesystems/affs.txt> ++ and <file:fs/affs/Changes>. ++ ++ With this driver you can also mount disk files used by Bernd ++ Schmidt's Un*X Amiga Emulator ++ (<http://www.freiburg.linux.de/~uae/>). ++ If you want to do this, you will also need to say Y or M to "Loop ++ device support", above. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called affs. If unsure, say N. ++ ++config ECRYPT_FS ++ tristate "eCrypt filesystem layer support (EXPERIMENTAL)" ++ depends on EXPERIMENTAL && KEYS && CRYPTO && NET ++ help ++ Encrypted filesystem that operates on the VFS layer. See ++ <file:Documentation/filesystems/ecryptfs.txt> to learn more about ++ eCryptfs. Userspace components are required and can be ++ obtained from <http://ecryptfs.sf.net>. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ecryptfs. ++ ++config HFS_FS ++ tristate "Apple Macintosh file system support (EXPERIMENTAL)" ++ depends on BLOCK && EXPERIMENTAL ++ select NLS ++ help ++ If you say Y here, you will be able to mount Macintosh-formatted ++ floppy disks and hard drive partitions with full read-write access. ++ Please read <file:Documentation/filesystems/hfs.txt> to learn about ++ the available mount options. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called hfs. ++ ++config HFSPLUS_FS ++ tristate "Apple Extended HFS file system support" ++ depends on BLOCK ++ select NLS ++ select NLS_UTF8 ++ help ++ If you say Y here, you will be able to mount extended format ++ Macintosh-formatted hard drive partitions with full read-write access. ++ ++ This file system is often called HFS+ and was introduced with ++ MacOS 8. It includes all Mac specific filesystem data such as ++ data forks and creator codes, but it also has several UNIX ++ style features such as file ownership and permissions. ++ ++config BEFS_FS ++ tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" ++ depends on BLOCK && EXPERIMENTAL ++ select NLS ++ help ++ The BeOS File System (BeFS) is the native file system of Be, Inc's ++ BeOS. Notable features include support for arbitrary attributes ++ on files and directories, and database-like indices on selected ++ attributes. (Also note that this driver doesn't make those features ++ available at this time). It is a 64 bit filesystem, so it supports ++ extremely large volumes and files. ++ ++ If you use this filesystem, you should also say Y to at least one ++ of the NLS (native language support) options below. ++ ++ If you don't know what this is about, say N. ++ ++ To compile this as a module, choose M here: the module will be ++ called befs. ++ ++config BEFS_DEBUG ++ bool "Debug BeFS" ++ depends on BEFS_FS ++ help ++ If you say Y here, you can use the 'debug' mount option to enable ++ debugging output from the driver. ++ ++config BFS_FS ++ tristate "BFS file system support (EXPERIMENTAL)" ++ depends on BLOCK && EXPERIMENTAL ++ help ++ Boot File System (BFS) is a file system used under SCO UnixWare to ++ allow the bootloader access to the kernel image and other important ++ files during the boot process. It is usually mounted under /stand ++ and corresponds to the slice marked as "STAND" in the UnixWare ++ partition. You should say Y if you want to read or write the files ++ on your /stand slice from within Linux. You then also need to say Y ++ to "UnixWare slices support", below. More information about the BFS ++ file system is contained in the file ++ <file:Documentation/filesystems/bfs.txt>. ++ ++ If you don't know what this is about, say N. ++ ++ To compile this as a module, choose M here: the module will be called ++ bfs. Note that the file system of your root partition (the one ++ containing the directory /) cannot be compiled as a module. ++ ++ ++ ++config EFS_FS ++ tristate "EFS file system support (read only) (EXPERIMENTAL)" ++ depends on BLOCK && EXPERIMENTAL ++ help ++ EFS is an older file system used for non-ISO9660 CD-ROMs and hard ++ disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer ++ uses the XFS file system for hard disk partitions however). ++ ++ This implementation only offers read-only access. If you don't know ++ what all this is about, it's safe to say N. For more information ++ about EFS see its home page at <http://aeschi.ch.eu.org/efs/>. ++ ++ To compile the EFS file system support as a module, choose M here: the ++ module will be called efs. ++ ++config JFFS2_FS ++ tristate "Journalling Flash File System v2 (JFFS2) support" ++ select CRC32 ++ depends on MTD ++ help ++ JFFS2 is the second generation of the Journalling Flash File System ++ for use on diskless embedded devices. It provides improved wear ++ levelling, compression and support for hard links. You cannot use ++ this on normal block devices, only on 'MTD' devices. ++ ++ Further information on the design and implementation of JFFS2 is ++ available at <http://sources.redhat.com/jffs2/>. ++ ++config JFFS2_FS_DEBUG ++ int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)" ++ depends on JFFS2_FS ++ default "0" ++ help ++ This controls the amount of debugging messages produced by the JFFS2 ++ code. Set it to zero for use in production systems. For evaluation, ++ testing and debugging, it's advisable to set it to one. This will ++ enable a few assertions and will print debugging messages at the ++ KERN_DEBUG loglevel, where they won't normally be visible. Level 2 ++ is unlikely to be useful - it enables extra debugging in certain ++ areas which at one point needed debugging, but when the bugs were ++ located and fixed, the detailed messages were relegated to level 2. ++ ++ If reporting bugs, please try to have available a full dump of the ++ messages at debug level 1 while the misbehaviour was occurring. ++ ++config JFFS2_FS_WRITEBUFFER ++ bool "JFFS2 write-buffering support" ++ depends on JFFS2_FS ++ default y ++ help ++ This enables the write-buffering support in JFFS2. ++ ++ This functionality is required to support JFFS2 on the following ++ types of flash devices: ++ - NAND flash ++ - NOR flash with transparent ECC ++ - DataFlash ++ ++config JFFS2_FS_WBUF_VERIFY ++ bool "Verify JFFS2 write-buffer reads" ++ depends on JFFS2_FS_WRITEBUFFER ++ default n ++ help ++ This causes JFFS2 to read back every page written through the ++ write-buffer, and check for errors. ++ ++config JFFS2_SUMMARY ++ bool "JFFS2 summary support (EXPERIMENTAL)" ++ depends on JFFS2_FS && EXPERIMENTAL ++ default n ++ help ++ This feature makes it possible to use summary information ++ for faster filesystem mount. ++ ++ The summary information can be inserted into a filesystem image ++ by the utility 'sumtool'. ++ ++ If unsure, say 'N'. ++ ++config JFFS2_FS_XATTR ++ bool "JFFS2 XATTR support (EXPERIMENTAL)" ++ depends on JFFS2_FS && EXPERIMENTAL ++ default n ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ <http://acl.bestbits.at/> for details). ++ ++ If unsure, say N. ++ ++config JFFS2_FS_POSIX_ACL ++ bool "JFFS2 POSIX Access Control Lists" ++ depends on JFFS2_FS_XATTR ++ default y ++ select FS_POSIX_ACL ++ help ++ Posix Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ To learn more about Access Control Lists, visit the Posix ACLs for ++ Linux website <http://acl.bestbits.at/>. ++ ++ If you don't know what Access Control Lists are, say N ++ ++config JFFS2_FS_SECURITY ++ bool "JFFS2 Security Labels" ++ depends on JFFS2_FS_XATTR ++ default y ++ help ++ Security labels support alternative access control models ++ implemented by security modules like SELinux. This option ++ enables an extended attribute handler for file security ++ labels in the jffs2 filesystem. ++ ++ If you are not using a security module that requires using ++ extended attributes for file security labels, say N. ++ ++config JFFS2_COMPRESSION_OPTIONS ++ bool "Advanced compression options for JFFS2" ++ depends on JFFS2_FS ++ default n ++ help ++ Enabling this option allows you to explicitly choose which ++ compression modules, if any, are enabled in JFFS2. Removing ++ compressors can mean you cannot read existing file systems, ++ and enabling experimental compressors can mean that you ++ write a file system which cannot be read by a standard kernel. ++ ++ If unsure, you should _definitely_ say 'N'. ++ ++config JFFS2_ZLIB ++ bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS ++ select ZLIB_INFLATE ++ select ZLIB_DEFLATE ++ depends on JFFS2_FS ++ default y ++ help ++ Zlib is designed to be a free, general-purpose, legally unencumbered, ++ lossless data-compression library for use on virtually any computer ++ hardware and operating system. See <http://www.gzip.org/zlib/> for ++ further information. ++ ++ Say 'Y' if unsure. ++ ++config JFFS2_LZO ++ bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS ++ select LZO_COMPRESS ++ select LZO_DECOMPRESS ++ depends on JFFS2_FS ++ default n ++ help ++ minilzo-based compression. Generally works better than Zlib. ++ ++ This feature was added in July, 2007. Say 'N' if you need ++ compatibility with older bootloaders or kernels. ++ ++config JFFS2_RTIME ++ bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS ++ depends on JFFS2_FS ++ default y ++ help ++ Rtime does manage to recompress already-compressed data. Say 'Y' if unsure. ++ ++config JFFS2_RUBIN ++ bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS ++ depends on JFFS2_FS ++ default n ++ help ++ RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. ++ ++choice ++ prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS ++ default JFFS2_CMODE_PRIORITY ++ depends on JFFS2_FS ++ help ++ You can set here the default compression mode of JFFS2 from ++ the available compression modes. Don't touch if unsure. ++ ++config JFFS2_CMODE_NONE ++ bool "no compression" ++ help ++ Uses no compression. ++ ++config JFFS2_CMODE_PRIORITY ++ bool "priority" ++ help ++ Tries the compressors in a predefined order and chooses the first ++ successful one. ++ ++config JFFS2_CMODE_SIZE ++ bool "size (EXPERIMENTAL)" ++ help ++ Tries all compressors and chooses the one which has the smallest ++ result. ++ ++config JFFS2_CMODE_FAVOURLZO ++ bool "Favour LZO" ++ help ++ Tries all compressors and chooses the one which has the smallest ++ result but gives some preference to LZO (which has faster ++ decompression) at the expense of size. ++ ++endchoice ++ ++# UBIFS File system configuration ++source "fs/ubifs/Kconfig" ++ ++config CRAMFS ++ tristate "Compressed ROM file system support (cramfs)" ++ depends on BLOCK ++ select ZLIB_INFLATE ++ help ++ Saying Y here includes support for CramFs (Compressed ROM File ++ System). CramFs is designed to be a simple, small, and compressed ++ file system for ROM based embedded systems. CramFs is read-only, ++ limited to 256MB file systems (with 16MB files), and doesn't support ++ 16/32 bits uid/gid, hard links and timestamps. ++ ++ See <file:Documentation/filesystems/cramfs.txt> and ++ <file:fs/cramfs/README> for further information. ++ ++ To compile this as a module, choose M here: the module will be called ++ cramfs. Note that the root file system (the one containing the ++ directory /) cannot be compiled as a module. ++ ++ If unsure, say N. ++ ++config VXFS_FS ++ tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" ++ depends on BLOCK ++ help ++ FreeVxFS is a file system driver that support the VERITAS VxFS(TM) ++ file system format. VERITAS VxFS(TM) is the standard file system ++ of SCO UnixWare (and possibly others) and optionally available ++ for Sunsoft Solaris, HP-UX and many other operating systems. ++ Currently only readonly access is supported. ++ ++ NOTE: the file system type as used by mount(1), mount(2) and ++ fstab(5) is 'vxfs' as it describes the file system format, not ++ the actual driver. ++ ++ To compile this as a module, choose M here: the module will be ++ called freevxfs. If unsure, say N. ++ ++config MINIX_FS ++ tristate "Minix file system support" ++ depends on BLOCK ++ help ++ Minix is a simple operating system used in many classes about OS's. ++ The minix file system (method to organize files on a hard disk ++ partition or a floppy disk) was the original file system for Linux, ++ but has been superseded by the second extended file system ext2fs. ++ You don't want to use the minix file system on your hard disk ++ because of certain built-in restrictions, but it is sometimes found ++ on older Linux floppy disks. This option will enlarge your kernel ++ by about 28 KB. If unsure, say N. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called minix. Note that the file system of your root ++ partition (the one containing the directory /) cannot be compiled as ++ a module. ++ ++config OMFS_FS ++ tristate "SonicBlue Optimized MPEG File System support" ++ depends on BLOCK ++ select CRC_ITU_T ++ help ++ This is the proprietary file system used by the Rio Karma music ++ player and ReplayTV DVR. Despite the name, this filesystem is not ++ more efficient than a standard FS for MPEG files, in fact likely ++ the opposite is true. Say Y if you have either of these devices ++ and wish to mount its disk. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called omfs. If unsure, say N. ++ ++config HPFS_FS ++ tristate "OS/2 HPFS file system support" ++ depends on BLOCK ++ help ++ OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS ++ is the file system used for organizing files on OS/2 hard disk ++ partitions. Say Y if you want to be able to read files from and ++ write files to an OS/2 HPFS partition on your hard drive. OS/2 ++ floppies however are in regular MSDOS format, so you don't need this ++ option in order to be able to read them. Read ++ <file:Documentation/filesystems/hpfs.txt>. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called hpfs. If unsure, say N. ++ ++ ++config QNX4FS_FS ++ tristate "QNX4 file system support (read only)" ++ depends on BLOCK ++ help ++ This is the file system used by the real-time operating systems ++ QNX 4 and QNX 6 (the latter is also called QNX RTP). ++ Further information is available at <http://www.qnx.com/>. ++ Say Y if you intend to mount QNX hard disks or floppies. ++ Unless you say Y to "QNX4FS read-write support" below, you will ++ only be able to read these file systems. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called qnx4. ++ ++ If you don't know whether you need it, then you don't need it: ++ answer N. ++ ++config QNX4FS_RW ++ bool "QNX4FS write support (DANGEROUS)" ++ depends on QNX4FS_FS && EXPERIMENTAL && BROKEN ++ help ++ Say Y if you want to test write support for QNX4 file systems. ++ ++ It's currently broken, so for now: ++ answer N. ++ ++config ROMFS_FS ++ tristate "ROM file system support" ++ depends on BLOCK ++ ---help--- ++ This is a very small read-only file system mainly intended for ++ initial ram disks of installation disks, but it could be used for ++ other read-only media as well. Read ++ <file:Documentation/filesystems/romfs.txt> for details. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called romfs. Note that the file system of your ++ root partition (the one containing the directory /) cannot be a ++ module. ++ ++ If you don't know whether you need it, then you don't need it: ++ answer N. ++ ++ ++config SYSV_FS ++ tristate "System V/Xenix/V7/Coherent file system support" ++ depends on BLOCK ++ help ++ SCO, Xenix and Coherent are commercial Unix systems for Intel ++ machines, and Version 7 was used on the DEC PDP-11. Saying Y ++ here would allow you to read from their floppies and hard disk ++ partitions. ++ ++ If you have floppies or hard disk partitions like that, it is likely ++ that they contain binaries from those other Unix systems; in order ++ to run these binaries, you will want to install linux-abi which is ++ a set of kernel modules that lets you run SCO, Xenix, Wyse, ++ UnixWare, Dell Unix and System V programs under Linux. It is ++ available via FTP (user: ftp) from ++ <ftp://ftp.openlinux.org/pub/people/hch/linux-abi/>). ++ NOTE: that will work only for binaries from Intel-based systems; ++ PDP ones will have to wait until somebody ports Linux to -11 ;-) ++ ++ If you only intend to mount files from some other Unix over the ++ network using NFS, you don't need the System V file system support ++ (but you need NFS file system support obviously). ++ ++ Note that this option is generally not needed for floppies, since a ++ good portable way to transport files and directories between unixes ++ (and even other operating systems) is given by the tar program ("man ++ tar" or preferably "info tar"). Note also that this option has ++ nothing whatsoever to do with the option "System V IPC". Read about ++ the System V file system in ++ <file:Documentation/filesystems/sysv-fs.txt>. ++ Saying Y here will enlarge your kernel by about 27 KB. ++ ++ To compile this as a module, choose M here: the module will be called ++ sysv. ++ ++ If you haven't heard about all of this before, it's safe to say N. ++ ++ ++config UFS_FS ++ tristate "UFS file system support (read only)" ++ depends on BLOCK ++ help ++ BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, ++ OpenBSD and NeXTstep) use a file system called UFS. Some System V ++ Unixes can create and mount hard disk partitions and diskettes using ++ this file system as well. Saying Y here will allow you to read from ++ these partitions; if you also want to write to them, say Y to the ++ experimental "UFS file system write support", below. Please read the ++ file <file:Documentation/filesystems/ufs.txt> for more information. ++ ++ The recently released UFS2 variant (used in FreeBSD 5.x) is ++ READ-ONLY supported. ++ ++ Note that this option is generally not needed for floppies, since a ++ good portable way to transport files and directories between unixes ++ (and even other operating systems) is given by the tar program ("man ++ tar" or preferably "info tar"). ++ ++ When accessing NeXTstep files, you may need to convert them from the ++ NeXT character set to the Latin1 character set; use the program ++ recode ("info recode") for this purpose. ++ ++ To compile the UFS file system support as a module, choose M here: the ++ module will be called ufs. ++ ++ If you haven't heard about all of this before, it's safe to say N. ++ ++config UFS_FS_WRITE ++ bool "UFS file system write support (DANGEROUS)" ++ depends on UFS_FS && EXPERIMENTAL ++ help ++ Say Y here if you want to try writing to UFS partitions. This is ++ experimental, so you should back up your UFS partitions beforehand. ++ ++config UFS_DEBUG ++ bool "UFS debugging" ++ depends on UFS_FS ++ help ++ If you are experiencing any problems with the UFS filesystem, say ++ Y here. This will result in _many_ additional debugging messages to be ++ written to the system log. ++ ++endmenu ++ ++menuconfig NETWORK_FILESYSTEMS ++ bool "Network File Systems" ++ default y ++ depends on NET ++ ---help--- ++ Say Y here to get to see options for network filesystems and ++ filesystem-related networking code, such as NFS daemon and ++ RPCSEC security modules. ++ ++ This option alone does not add any kernel code. ++ ++ If you say N, all options in this submenu will be skipped and ++ disabled; if unsure, say Y here. ++ ++if NETWORK_FILESYSTEMS ++ ++config NFS_FS ++ tristate "NFS client support" ++ depends on INET ++ select LOCKD ++ select SUNRPC ++ select NFS_ACL_SUPPORT if NFS_V3_ACL ++ help ++ Choose Y here if you want to access files residing on other ++ computers using Sun's Network File System protocol. To compile ++ this file system support as a module, choose M here: the module ++ will be called nfs. ++ ++ To mount file systems exported by NFS servers, you also need to ++ install the user space mount.nfs command which can be found in ++ the Linux nfs-utils package, available from http://linux-nfs.org/. ++ Information about using the mount command is available in the ++ mount(8) man page. More detail about the Linux NFS client ++ implementation is available via the nfs(5) man page. ++ ++ Below you can choose which versions of the NFS protocol are ++ available in the kernel to mount NFS servers. Support for NFS ++ version 2 (RFC 1094) is always available when NFS_FS is selected. ++ ++ To configure a system which mounts its root file system via NFS ++ at boot time, say Y here, select "Kernel level IP ++ autoconfiguration" in the NETWORK menu, and select "Root file ++ system on NFS" below. You cannot compile this file system as a ++ module in this case. ++ ++ If unsure, say N. ++ ++config NFS_V3 ++ bool "NFS client support for NFS version 3" ++ depends on NFS_FS ++ help ++ This option enables support for version 3 of the NFS protocol ++ (RFC 1813) in the kernel's NFS client. ++ ++ If unsure, say Y. ++ ++config NFS_V3_ACL ++ bool "NFS client support for the NFSv3 ACL protocol extension" ++ depends on NFS_V3 ++ help ++ Some NFS servers support an auxiliary NFSv3 ACL protocol that ++ Sun added to Solaris but never became an official part of the ++ NFS version 3 protocol. This protocol extension allows ++ applications on NFS clients to manipulate POSIX Access Control ++ Lists on files residing on NFS servers. NFS servers enforce ++ ACLs on local files whether this protocol is available or not. ++ ++ Choose Y here if your NFS server supports the Solaris NFSv3 ACL ++ protocol extension and you want your NFS client to allow ++ applications to access and modify ACLs on files on the server. ++ ++ Most NFS servers don't support the Solaris NFSv3 ACL protocol ++ extension. You can choose N here or specify the "noacl" mount ++ option to prevent your NFS client from trying to use the NFSv3 ++ ACL protocol. ++ ++ If unsure, say N. ++ ++config NFS_V4 ++ bool "NFS client support for NFS version 4 (EXPERIMENTAL)" ++ depends on NFS_FS && EXPERIMENTAL ++ select RPCSEC_GSS_KRB5 ++ help ++ This option enables support for version 4 of the NFS protocol ++ (RFC 3530) in the kernel's NFS client. ++ ++ To mount NFS servers using NFSv4, you also need to install user ++ space programs which can be found in the Linux nfs-utils package, ++ available from http://linux-nfs.org/. ++ ++ If unsure, say N. ++ ++config ROOT_NFS ++ bool "Root file system on NFS" ++ depends on NFS_FS=y && IP_PNP ++ help ++ If you want your system to mount its root file system via NFS, ++ choose Y here. This is common practice for managing systems ++ without local permanent storage. For details, read ++ <file:Documentation/filesystems/nfsroot.txt>. ++ ++ Most people say N here. ++ ++config NFSD ++ tristate "NFS server support" ++ depends on INET ++ select LOCKD ++ select SUNRPC ++ select EXPORTFS ++ select NFS_ACL_SUPPORT if NFSD_V2_ACL ++ help ++ Choose Y here if you want to allow other computers to access ++ files residing on this system using Sun's Network File System ++ protocol. To compile the NFS server support as a module, ++ choose M here: the module will be called nfsd. ++ ++ You may choose to use a user-space NFS server instead, in which ++ case you can choose N here. ++ ++ To export local file systems using NFS, you also need to install ++ user space programs which can be found in the Linux nfs-utils ++ package, available from http://linux-nfs.org/. More detail about ++ the Linux NFS server implementation is available via the ++ exports(5) man page. ++ ++ Below you can choose which versions of the NFS protocol are ++ available to clients mounting the NFS server on this system. ++ Support for NFS version 2 (RFC 1094) is always available when ++ CONFIG_NFSD is selected. ++ ++ If unsure, say N. ++ ++config NFSD_V2_ACL ++ bool ++ depends on NFSD ++ ++config NFSD_V3 ++ bool "NFS server support for NFS version 3" ++ depends on NFSD ++ help ++ This option enables support in your system's NFS server for ++ version 3 of the NFS protocol (RFC 1813). ++ ++ If unsure, say Y. ++ ++config NFSD_V3_ACL ++ bool "NFS server support for the NFSv3 ACL protocol extension" ++ depends on NFSD_V3 ++ select NFSD_V2_ACL ++ help ++ Solaris NFS servers support an auxiliary NFSv3 ACL protocol that ++ never became an official part of the NFS version 3 protocol. ++ This protocol extension allows applications on NFS clients to ++ manipulate POSIX Access Control Lists on files residing on NFS ++ servers. NFS servers enforce POSIX ACLs on local files whether ++ this protocol is available or not. ++ ++ This option enables support in your system's NFS server for the ++ NFSv3 ACL protocol extension allowing NFS clients to manipulate ++ POSIX ACLs on files exported by your system's NFS server. NFS ++ clients which support the Solaris NFSv3 ACL protocol can then ++ access and modify ACLs on your NFS server. ++ ++ To store ACLs on your NFS server, you also need to enable ACL- ++ related CONFIG options for your local file systems of choice. ++ ++ If unsure, say N. ++ ++config NFSD_V4 ++ bool "NFS server support for NFS version 4 (EXPERIMENTAL)" ++ depends on NFSD && PROC_FS && EXPERIMENTAL ++ select NFSD_V3 ++ select FS_POSIX_ACL ++ select RPCSEC_GSS_KRB5 ++ help ++ This option enables support in your system's NFS server for ++ version 4 of the NFS protocol (RFC 3530). ++ ++ To export files using NFSv4, you need to install additional user ++ space programs which can be found in the Linux nfs-utils package, ++ available from http://linux-nfs.org/. ++ ++ If unsure, say N. ++ ++config LOCKD ++ tristate ++ ++config LOCKD_V4 ++ bool ++ depends on NFSD_V3 || NFS_V3 ++ default y ++ ++config EXPORTFS ++ tristate ++ ++config NFS_ACL_SUPPORT ++ tristate ++ select FS_POSIX_ACL ++ ++config NFS_COMMON ++ bool ++ depends on NFSD || NFS_FS ++ default y ++ ++config SUNRPC ++ tristate ++ ++config SUNRPC_GSS ++ tristate ++ ++config SUNRPC_XPRT_RDMA ++ tristate ++ depends on SUNRPC && INFINIBAND && EXPERIMENTAL ++ default SUNRPC && INFINIBAND ++ help ++ This option enables an RPC client transport capability that ++ allows the NFS client to mount servers via an RDMA-enabled ++ transport. ++ ++ To compile RPC client RDMA transport support as a module, ++ choose M here: the module will be called xprtrdma. ++ ++ If unsure, say N. ++ ++config RPCSEC_GSS_KRB5 ++ tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" ++ depends on SUNRPC && EXPERIMENTAL ++ select SUNRPC_GSS ++ select CRYPTO ++ select CRYPTO_MD5 ++ select CRYPTO_DES ++ select CRYPTO_CBC ++ help ++ Choose Y here to enable Secure RPC using the Kerberos version 5 ++ GSS-API mechanism (RFC 1964). ++ ++ Secure RPC calls with Kerberos require an auxiliary user-space ++ daemon which may be found in the Linux nfs-utils package ++ available from http://linux-nfs.org/. In addition, user-space ++ Kerberos support should be installed. ++ ++ If unsure, say N. ++ ++config RPCSEC_GSS_SPKM3 ++ tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" ++ depends on SUNRPC && EXPERIMENTAL ++ select SUNRPC_GSS ++ select CRYPTO ++ select CRYPTO_MD5 ++ select CRYPTO_DES ++ select CRYPTO_CAST5 ++ select CRYPTO_CBC ++ help ++ Choose Y here to enable Secure RPC using the SPKM3 public key ++ GSS-API mechansim (RFC 2025). ++ ++ Secure RPC calls with SPKM3 require an auxiliary userspace ++ daemon which may be found in the Linux nfs-utils package ++ available from http://linux-nfs.org/. ++ ++ If unsure, say N. ++ ++config SMB_FS ++ tristate "SMB file system support (OBSOLETE, please use CIFS)" ++ depends on INET ++ select NLS ++ help ++ SMB (Server Message Block) is the protocol Windows for Workgroups ++ (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share ++ files and printers over local networks. Saying Y here allows you to ++ mount their file systems (often called "shares" in this context) and ++ access them just like any other Unix directory. Currently, this ++ works only if the Windows machines use TCP/IP as the underlying ++ transport protocol, and not NetBEUI. For details, read ++ <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO, ++ available from <http://www.tldp.org/docs.html#howto>. ++ ++ Note: if you just want your box to act as an SMB *server* and make ++ files and printing services available to Windows clients (which need ++ to have a TCP/IP stack), you don't need to say Y here; you can use ++ the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>) ++ for that. ++ ++ General information about how to connect Linux, Windows machines and ++ Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. ++ ++ To compile the SMB support as a module, choose M here: ++ the module will be called smbfs. Most people say N, however. ++ ++config SMB_NLS_DEFAULT ++ bool "Use a default NLS" ++ depends on SMB_FS ++ help ++ Enabling this will make smbfs use nls translations by default. You ++ need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls ++ settings and you need to give the default nls for the SMB server as ++ CONFIG_SMB_NLS_REMOTE. ++ ++ The nls settings can be changed at mount time, if your smbmount ++ supports that, using the codepage and iocharset parameters. ++ ++ smbmount from samba 2.2.0 or later supports this. ++ ++config SMB_NLS_REMOTE ++ string "Default Remote NLS Option" ++ depends on SMB_NLS_DEFAULT ++ default "cp437" ++ help ++ This setting allows you to specify a default value for which ++ codepage the server uses. If this field is left blank no ++ translations will be done by default. The local codepage/charset ++ default to CONFIG_NLS_DEFAULT. ++ ++ The nls settings can be changed at mount time, if your smbmount ++ supports that, using the codepage and iocharset parameters. ++ ++ smbmount from samba 2.2.0 or later supports this. ++ ++config CIFS ++ tristate "CIFS support (advanced network filesystem, SMBFS successor)" ++ depends on INET ++ select NLS ++ help ++ This is the client VFS module for the Common Internet File System ++ (CIFS) protocol which is the successor to the Server Message Block ++ (SMB) protocol, the native file sharing mechanism for most early ++ PC operating systems. The CIFS protocol is fully supported by ++ file servers such as Windows 2000 (including Windows 2003, NT 4 ++ and Windows XP) as well by Samba (which provides excellent CIFS ++ server support for Linux and many other operating systems). Limited ++ support for OS/2 and Windows ME and similar servers is provided as ++ well. ++ ++ The cifs module provides an advanced network file system ++ client for mounting to CIFS compliant servers. It includes ++ support for DFS (hierarchical name space), secure per-user ++ session establishment via Kerberos or NTLM or NTLMv2, ++ safe distributed caching (oplock), optional packet ++ signing, Unicode and other internationalization improvements. ++ If you need to mount to Samba or Windows from this machine, say Y. ++ ++config CIFS_STATS ++ bool "CIFS statistics" ++ depends on CIFS ++ help ++ Enabling this option will cause statistics for each server share ++ mounted by the cifs client to be displayed in /proc/fs/cifs/Stats ++ ++config CIFS_STATS2 ++ bool "Extended statistics" ++ depends on CIFS_STATS ++ help ++ Enabling this option will allow more detailed statistics on SMB ++ request timing to be displayed in /proc/fs/cifs/DebugData and also ++ allow optional logging of slow responses to dmesg (depending on the ++ value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details). ++ These additional statistics may have a minor effect on performance ++ and memory utilization. ++ ++ Unless you are a developer or are doing network performance analysis ++ or tuning, say N. ++ ++config CIFS_WEAK_PW_HASH ++ bool "Support legacy servers which use weaker LANMAN security" ++ depends on CIFS ++ help ++ Modern CIFS servers including Samba and most Windows versions ++ (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) ++ security mechanisms. These hash the password more securely ++ than the mechanisms used in the older LANMAN version of the ++ SMB protocol but LANMAN based authentication is needed to ++ establish sessions with some old SMB servers. ++ ++ Enabling this option allows the cifs module to mount to older ++ LANMAN based servers such as OS/2 and Windows 95, but such ++ mounts may be less secure than mounts using NTLM or more recent ++ security mechanisms if you are on a public network. Unless you ++ have a need to access old SMB servers (and are on a private ++ network) you probably want to say N. Even if this support ++ is enabled in the kernel build, LANMAN authentication will not be ++ used automatically. At runtime LANMAN mounts are disabled but ++ can be set to required (or optional) either in ++ /proc/fs/cifs (see fs/cifs/README for more detail) or via an ++ option on the mount command. This support is disabled by ++ default in order to reduce the possibility of a downgrade ++ attack. ++ ++ If unsure, say N. ++ ++config CIFS_XATTR ++ bool "CIFS extended attributes" ++ depends on CIFS ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ <http://acl.bestbits.at/> for details). CIFS maps the name of ++ extended attributes beginning with the user namespace prefix ++ to SMB/CIFS EAs. EAs are stored on Windows servers without the ++ user namespace prefix, but their names are seen by Linux cifs clients ++ prefaced by the user namespace prefix. The system namespace ++ (used by some filesystems to store ACLs) is not supported at ++ this time. ++ ++ If unsure, say N. ++ ++config CIFS_POSIX ++ bool "CIFS POSIX Extensions" ++ depends on CIFS_XATTR ++ help ++ Enabling this option will cause the cifs client to attempt to ++ negotiate a newer dialect with servers, such as Samba 3.0.5 ++ or later, that optionally can handle more POSIX like (rather ++ than Windows like) file behavior. It also enables ++ support for POSIX ACLs (getfacl and setfacl) to servers ++ (such as Samba 3.10 and later) which can negotiate ++ CIFS POSIX ACL support. If unsure, say N. ++ ++config CIFS_DEBUG2 ++ bool "Enable additional CIFS debugging routines" ++ depends on CIFS ++ help ++ Enabling this option adds a few more debugging routines ++ to the cifs code which slightly increases the size of ++ the cifs module and can cause additional logging of debug ++ messages in some error paths, slowing performance. This ++ option can be turned off unless you are debugging ++ cifs problems. If unsure, say N. ++ ++config CIFS_EXPERIMENTAL ++ bool "CIFS Experimental Features (EXPERIMENTAL)" ++ depends on CIFS && EXPERIMENTAL ++ help ++ Enables cifs features under testing. These features are ++ experimental and currently include DFS support and directory ++ change notification ie fcntl(F_DNOTIFY), as well as the upcall ++ mechanism which will be used for Kerberos session negotiation ++ and uid remapping. Some of these features also may depend on ++ setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental ++ (which is disabled by default). See the file fs/cifs/README ++ for more details. If unsure, say N. ++ ++config CIFS_UPCALL ++ bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" ++ depends on CIFS_EXPERIMENTAL ++ depends on KEYS ++ help ++ Enables an upcall mechanism for CIFS which accesses ++ userspace helper utilities to provide SPNEGO packaged (RFC 4178) ++ Kerberos tickets which are needed to mount to certain secure servers ++ (for which more secure Kerberos authentication is required). If ++ unsure, say N. ++ ++config CIFS_DFS_UPCALL ++ bool "DFS feature support (EXPERIMENTAL)" ++ depends on CIFS_EXPERIMENTAL ++ depends on KEYS ++ help ++ Enables an upcall mechanism for CIFS which contacts userspace ++ helper utilities to provide server name resolution (host names to ++ IP addresses) which is needed for implicit mounts of DFS junction ++ points. If unsure, say N. ++ ++config NCP_FS ++ tristate "NCP file system support (to mount NetWare volumes)" ++ depends on IPX!=n || INET ++ help ++ NCP (NetWare Core Protocol) is a protocol that runs over IPX and is ++ used by Novell NetWare clients to talk to file servers. It is to ++ IPX what NFS is to TCP/IP, if that helps. Saying Y here allows you ++ to mount NetWare file server volumes and to access them just like ++ any other Unix directory. For details, please read the file ++ <file:Documentation/filesystems/ncpfs.txt> in the kernel source and ++ the IPX-HOWTO from <http://www.tldp.org/docs.html#howto>. ++ ++ You do not have to say Y here if you want your Linux box to act as a ++ file *server* for Novell NetWare clients. ++ ++ General information about how to connect Linux, Windows machines and ++ Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. ++ ++ To compile this as a module, choose M here: the module will be called ++ ncpfs. Say N unless you are connected to a Novell network. ++ ++source "fs/ncpfs/Kconfig" ++ ++config CODA_FS ++ tristate "Coda file system support (advanced network fs)" ++ depends on INET ++ help ++ Coda is an advanced network file system, similar to NFS in that it ++ enables you to mount file systems of a remote server and access them ++ with regular Unix commands as if they were sitting on your hard ++ disk. Coda has several advantages over NFS: support for ++ disconnected operation (e.g. for laptops), read/write server ++ replication, security model for authentication and encryption, ++ persistent client caches and write back caching. ++ ++ If you say Y here, your Linux box will be able to act as a Coda ++ *client*. You will need user level code as well, both for the ++ client and server. Servers are currently user level, i.e. they need ++ no kernel support. Please read ++ <file:Documentation/filesystems/coda.txt> and check out the Coda ++ home page <http://www.coda.cs.cmu.edu/>. ++ ++ To compile the coda client support as a module, choose M here: the ++ module will be called coda. ++ ++config AFS_FS ++ tristate "Andrew File System support (AFS) (EXPERIMENTAL)" ++ depends on INET && EXPERIMENTAL ++ select AF_RXRPC ++ help ++ If you say Y here, you will get an experimental Andrew File System ++ driver. It currently only supports unsecured read-only AFS access. ++ ++ See <file:Documentation/filesystems/afs.txt> for more information. ++ ++ If unsure, say N. ++ ++config AFS_DEBUG ++ bool "AFS dynamic debugging" ++ depends on AFS_FS ++ help ++ Say Y here to make runtime controllable debugging messages appear. ++ ++ See <file:Documentation/filesystems/afs.txt> for more information. ++ ++ If unsure, say N. ++ ++config 9P_FS ++ tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" ++ depends on INET && NET_9P && EXPERIMENTAL ++ help ++ If you say Y here, you will get experimental support for ++ Plan 9 resource sharing via the 9P2000 protocol. ++ ++ See <http://v9fs.sf.net> for more information. ++ ++ If unsure, say N. ++ ++endif # NETWORK_FILESYSTEMS ++ ++if BLOCK ++menu "Partition Types" ++ ++source "fs/partitions/Kconfig" ++ ++endmenu ++endif ++ ++source "fs/nls/Kconfig" ++source "fs/dlm/Kconfig" ++ ++endmenu +diff -uNr a/fs/Makefile b/fs/Makefile +--- a/fs/Makefile 2008-07-28 19:40:31.000000000 -0700 ++++ b/fs/Makefile 2008-08-13 16:18:09.000000000 -0700 +@@ -74,6 +74,7 @@ + obj-$(CONFIG_JBD2) += jbd2/ + obj-$(CONFIG_EXT2_FS) += ext2/ + obj-$(CONFIG_CRAMFS) += cramfs/ ++obj-$(CONFIG_SQUASHFS) += squashfs/ + obj-y += ramfs/ + obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ + obj-$(CONFIG_CODA_FS) += coda/ +diff -uNr a/fs/squashfs/block.c b/fs/squashfs/block.c +--- a/fs/squashfs/block.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/block.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,314 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * block.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++static struct buffer_head *get_block_length(struct super_block *s, ++ int *cur_index, int *offset, int *c_byte) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ unsigned short temp; ++ struct buffer_head *bh; ++ ++ if (!(bh = sb_bread(s, *cur_index))) ++ goto out; ++ ++ if (msblk->devblksize - *offset == 1) { ++ if (msblk->swap) ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ else ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ brelse(bh); ++ if (!(bh = sb_bread(s, ++(*cur_index)))) ++ goto out; ++ if (msblk->swap) ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ bh->b_data); ++ else ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ bh->b_data); ++ *c_byte = temp; ++ *offset = 1; ++ } else { ++ if (msblk->swap) { ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset + 1)); ++ } else { ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset + 1)); ++ } ++ *c_byte = temp; ++ *offset += 2; ++ } ++ ++ if (SQUASHFS_CHECK_DATA(msblk->sblk.flags)) { ++ if (*offset == msblk->devblksize) { ++ brelse(bh); ++ if (!(bh = sb_bread(s, ++(*cur_index)))) ++ goto out; ++ *offset = 0; ++ } ++ if (*((unsigned char *) (bh->b_data + *offset)) != ++ SQUASHFS_MARKER_BYTE) { ++ ERROR("Metadata block marker corrupt @ %x\n", ++ *cur_index); ++ brelse(bh); ++ goto out; ++ } ++ (*offset)++; ++ } ++ return bh; ++ ++out: ++ return NULL; ++} ++ ++ ++unsigned int squashfs_read_data(struct super_block *s, char *buffer, ++ long long index, unsigned int length, ++ long long *next_index, int srclength) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ struct buffer_head **bh; ++ unsigned int offset = index & ((1 << msblk->devblksize_log2) - 1); ++ unsigned int cur_index = index >> msblk->devblksize_log2; ++ int bytes, avail_bytes, b = 0, k = 0; ++ unsigned int compressed; ++ unsigned int c_byte = length; ++ ++ bh = kmalloc(((sblk->block_size >> msblk->devblksize_log2) + 1) * ++ sizeof(struct buffer_head *), GFP_KERNEL); ++ if (bh == NULL) ++ goto read_failure; ++ ++ if (c_byte) { ++ bytes = -offset; ++ compressed = SQUASHFS_COMPRESSED_BLOCK(c_byte); ++ c_byte = SQUASHFS_COMPRESSED_SIZE_BLOCK(c_byte); ++ ++ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", index, ++ compressed ? "" : "un", (unsigned int) c_byte, srclength); ++ ++ if (c_byte > srclength || index < 0 || (index + c_byte) > sblk->bytes_used) ++ goto read_failure; ++ ++ for (b = 0; bytes < (int) c_byte; b++, cur_index++) { ++ bh[b] = sb_getblk(s, cur_index); ++ if (bh[b] == NULL) ++ goto block_release; ++ bytes += msblk->devblksize; ++ } ++ ll_rw_block(READ, b, bh); ++ } else { ++ if (index < 0 || (index + 2) > sblk->bytes_used) ++ goto read_failure; ++ ++ bh[0] = get_block_length(s, &cur_index, &offset, &c_byte); ++ if (bh[0] == NULL) ++ goto read_failure; ++ b = 1; ++ ++ bytes = msblk->devblksize - offset; ++ compressed = SQUASHFS_COMPRESSED(c_byte); ++ c_byte = SQUASHFS_COMPRESSED_SIZE(c_byte); ++ ++ TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ++ ? "" : "un", (unsigned int) c_byte); ++ ++ if (c_byte > srclength || (index + c_byte) > sblk->bytes_used) ++ goto block_release; ++ ++ for (; bytes < c_byte; b++) { ++ bh[b] = sb_getblk(s, ++cur_index); ++ if (bh[b] == NULL) ++ goto block_release; ++ bytes += msblk->devblksize; ++ } ++ ll_rw_block(READ, b - 1, bh + 1); ++ } ++ ++ if (compressed) { ++ int zlib_err = 0; ++ ++ /* ++ * uncompress block ++ */ ++ ++ mutex_lock(&msblk->read_data_mutex); ++ ++ msblk->stream.next_out = buffer; ++ msblk->stream.avail_out = srclength; ++ ++ for (bytes = 0; k < b; k++) { ++ avail_bytes = min(c_byte - bytes, msblk->devblksize - offset); ++ ++ wait_on_buffer(bh[k]); ++ if (!buffer_uptodate(bh[k])) ++ goto release_mutex; ++ ++ msblk->stream.next_in = bh[k]->b_data + offset; ++ msblk->stream.avail_in = avail_bytes; ++ ++ if (k == 0) { ++ zlib_err = zlib_inflateInit(&msblk->stream); ++ if (zlib_err != Z_OK) { ++ ERROR("zlib_inflateInit returned unexpected result 0x%x," ++ " srclength %d\n", zlib_err, srclength); ++ goto release_mutex; ++ } ++ ++ if (avail_bytes == 0) { ++ offset = 0; ++ brelse(bh[k]); ++ continue; ++ } ++ } ++ ++ zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH); ++ if (zlib_err != Z_OK && zlib_err != Z_STREAM_END) { ++ ERROR("zlib_inflate returned unexpected result 0x%x," ++ " srclength %d, avail_in %d, avail_out %d\n", zlib_err, ++ srclength, msblk->stream.avail_in, msblk->stream.avail_out); ++ goto release_mutex; ++ } ++ ++ bytes += avail_bytes; ++ offset = 0; ++ brelse(bh[k]); ++ } ++ ++ if (zlib_err != Z_STREAM_END) ++ goto release_mutex; ++ ++ zlib_err = zlib_inflateEnd(&msblk->stream); ++ if (zlib_err != Z_OK) { ++ ERROR("zlib_inflateEnd returned unexpected result 0x%x," ++ " srclength %d\n", zlib_err, srclength); ++ goto release_mutex; ++ } ++ bytes = msblk->stream.total_out; ++ mutex_unlock(&msblk->read_data_mutex); ++ } else { ++ int i; ++ ++ for(i = 0; i < b; i++) { ++ wait_on_buffer(bh[i]); ++ if (!buffer_uptodate(bh[i])) ++ goto block_release; ++ } ++ ++ for (bytes = 0; k < b; k++) { ++ avail_bytes = min(c_byte - bytes, msblk->devblksize - offset); ++ ++ memcpy(buffer + bytes, bh[k]->b_data + offset, avail_bytes); ++ bytes += avail_bytes; ++ offset = 0; ++ brelse(bh[k]); ++ } ++ } ++ ++ if (next_index) ++ *next_index = index + c_byte + (length ? 0 : ++ (SQUASHFS_CHECK_DATA(msblk->sblk.flags) ? 3 : 2)); ++ ++ kfree(bh); ++ return bytes; ++ ++release_mutex: ++ mutex_unlock(&msblk->read_data_mutex); ++ ++block_release: ++ for (; k < b; k++) ++ brelse(bh[k]); ++ ++read_failure: ++ ERROR("sb_bread failed reading block 0x%x\n", cur_index); ++ kfree(bh); ++ return 0; ++} ++ ++ ++int squashfs_get_cached_block(struct super_block *s, void *buffer, ++ long long block, unsigned int offset, ++ int length, long long *next_block, ++ unsigned int *next_offset) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ int bytes, return_length = length; ++ struct squashfs_cache_entry *entry; ++ ++ TRACE("Entered squashfs_get_cached_block [%llx:%x]\n", block, offset); ++ ++ while (1) { ++ entry = squashfs_cache_get(s, msblk->block_cache, block, 0); ++ bytes = entry->length - offset; ++ ++ if (entry->error || bytes < 1) { ++ return_length = 0; ++ goto finish; ++ } else if (bytes >= length) { ++ if (buffer) ++ memcpy(buffer, entry->data + offset, length); ++ if (entry->length - offset == length) { ++ *next_block = entry->next_index; ++ *next_offset = 0; ++ } else { ++ *next_block = block; ++ *next_offset = offset + length; ++ } ++ goto finish; ++ } else { ++ if (buffer) { ++ memcpy(buffer, entry->data + offset, bytes); ++ buffer = (char *) buffer + bytes; ++ } ++ block = entry->next_index; ++ squashfs_cache_put(msblk->block_cache, entry); ++ length -= bytes; ++ offset = 0; ++ } ++ } ++ ++finish: ++ squashfs_cache_put(msblk->block_cache, entry); ++ return return_length; ++} +diff -uNr a/fs/squashfs/cache.c b/fs/squashfs/cache.c +--- a/fs/squashfs/cache.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/cache.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,189 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * cache.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++struct squashfs_cache_entry *squashfs_cache_get(struct super_block *s, ++ struct squashfs_cache *cache, long long block, int length) ++{ ++ int i, n; ++ struct squashfs_cache_entry *entry; ++ ++ spin_lock(&cache->lock); ++ ++ while (1) { ++ for (i = 0; i < cache->entries && cache->entry[i].block != block; i++); ++ ++ if (i == cache->entries) { ++ if (cache->unused_blks == 0) { ++ cache->waiting ++; ++ spin_unlock(&cache->lock); ++ wait_event(cache->wait_queue, cache->unused_blks); ++ spin_lock(&cache->lock); ++ cache->waiting --; ++ continue; ++ } ++ ++ i = cache->next_blk; ++ for (n = 0; n < cache->entries; n++) { ++ if (cache->entry[i].locked == 0) ++ break; ++ i = (i + 1) % cache->entries; ++ } ++ ++ cache->next_blk = (i + 1) % cache->entries; ++ entry = &cache->entry[i]; ++ ++ cache->unused_blks --; ++ entry->block = block; ++ entry->locked = 1; ++ entry->pending = 1; ++ entry->waiting = 0; ++ entry->error = 0; ++ spin_unlock(&cache->lock); ++ ++ entry->length = squashfs_read_data(s, entry->data, ++ block, length, &entry->next_index, cache->block_size); ++ ++ spin_lock(&cache->lock); ++ ++ if (entry->length == 0) ++ entry->error = 1; ++ ++ entry->pending = 0; ++ spin_unlock(&cache->lock); ++ if (entry->waiting) ++ wake_up_all(&entry->wait_queue); ++ goto out; ++ } ++ ++ entry = &cache->entry[i]; ++ if (entry->locked == 0) ++ cache->unused_blks --; ++ entry->locked++; ++ ++ if (entry->pending) { ++ entry->waiting ++; ++ spin_unlock(&cache->lock); ++ wait_event(entry->wait_queue, !entry->pending); ++ goto out; ++ } ++ ++ spin_unlock(&cache->lock); ++ goto out; ++ } ++ ++out: ++ TRACE("Got %s %d, start block %lld, locked %d, error %d\n", i, ++ cache->name, entry->block, entry->locked, entry->error); ++ if (entry->error) ++ ERROR("Unable to read %s cache entry [%llx]\n", cache->name, block); ++ return entry; ++} ++ ++ ++void squashfs_cache_put(struct squashfs_cache *cache, ++ struct squashfs_cache_entry *entry) ++{ ++ spin_lock(&cache->lock); ++ entry->locked --; ++ if (entry->locked == 0) { ++ cache->unused_blks ++; ++ spin_unlock(&cache->lock); ++ if (cache->waiting) ++ wake_up(&cache->wait_queue); ++ } else ++ spin_unlock(&cache->lock); ++} ++ ++ ++void squashfs_cache_delete(struct squashfs_cache *cache) ++{ ++ int i; ++ ++ if (cache == NULL) ++ return; ++ ++ for (i = 0; i < cache->entries; i++) ++ if (cache->entry[i].data) { ++ if (cache->use_vmalloc) ++ vfree(cache->entry[i].data); ++ else ++ kfree(cache->entry[i].data); ++ } ++ ++ kfree(cache); ++} ++ ++ ++struct squashfs_cache *squashfs_cache_init(char *name, int entries, ++ int block_size, int use_vmalloc) ++{ ++ int i; ++ struct squashfs_cache *cache = kzalloc(sizeof(struct squashfs_cache) + ++ entries * sizeof(struct squashfs_cache_entry), GFP_KERNEL); ++ if (cache == NULL) { ++ ERROR("Failed to allocate %s cache\n", name); ++ goto failed; ++ } ++ ++ cache->next_blk = 0; ++ cache->unused_blks = entries; ++ cache->entries = entries; ++ cache->block_size = block_size; ++ cache->use_vmalloc = use_vmalloc; ++ cache->name = name; ++ cache->waiting = 0; ++ spin_lock_init(&cache->lock); ++ init_waitqueue_head(&cache->wait_queue); ++ ++ for (i = 0; i < entries; i++) { ++ init_waitqueue_head(&cache->entry[i].wait_queue); ++ cache->entry[i].block = SQUASHFS_INVALID_BLK; ++ cache->entry[i].data = use_vmalloc ? vmalloc(block_size) : ++ kmalloc(block_size, GFP_KERNEL); ++ if (cache->entry[i].data == NULL) { ++ ERROR("Failed to allocate %s cache entry\n", name); ++ goto cleanup; ++ } ++ } ++ ++ return cache; ++ ++cleanup: ++ squashfs_cache_delete(cache); ++failed: ++ return NULL; ++} +diff -uNr a/fs/squashfs/dir.c b/fs/squashfs/dir.c +--- a/fs/squashfs/dir.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/dir.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,216 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * dir.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++static const unsigned char squashfs_filetype_table[] = { ++ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK ++}; ++ ++static int get_dir_index_using_offset(struct super_block *s, ++ long long *next_block, unsigned int *next_offset, ++ long long index_start, unsigned int index_offset, int i_count, ++ long long f_pos) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index index; ++ ++ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n", ++ i_count, (unsigned int) f_pos); ++ ++ f_pos -= 3; ++ if (f_pos == 0) ++ goto finish; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index sindex; ++ squashfs_get_cached_block(s, &sindex, index_start, index_offset, ++ sizeof(sindex), &index_start, &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX(&index, &sindex); ++ } else ++ squashfs_get_cached_block(s, &index, index_start, index_offset, ++ sizeof(index), &index_start, &index_offset); ++ ++ if (index.index > f_pos) ++ break; ++ ++ squashfs_get_cached_block(s, NULL, index_start, index_offset, ++ index.size + 1, &index_start, &index_offset); ++ ++ length = index.index; ++ *next_block = index.start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ ++finish: ++ return length + 3; ++} ++ ++ ++static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ struct inode *i = file->f_dentry->d_inode; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, dir_count; ++ struct squashfs_dir_header dirh; ++ struct squashfs_dir_entry *dire; ++ ++ TRACE("Entered squashfs_readdir [%llx:%x]\n", next_block, next_offset); ++ ++ dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL); ++ if (dire == NULL) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto finish; ++ } ++ ++ while(file->f_pos < 3) { ++ char *name; ++ int size, i_ino; ++ ++ if(file->f_pos == 0) { ++ name = "."; ++ size = 1; ++ i_ino = i->i_ino; ++ } else { ++ name = ".."; ++ size = 2; ++ i_ino = SQUASHFS_I(i)->u.s2.parent_inode; ++ } ++ TRACE("Calling filldir(%x, %s, %d, %d, %d, %d)\n", ++ (unsigned int) dirent, name, size, (int) ++ file->f_pos, i_ino, squashfs_filetype_table[1]); ++ ++ if (filldir(dirent, name, size, file->f_pos, i_ino, ++ squashfs_filetype_table[1]) < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ goto finish; ++ } ++ file->f_pos += size; ++ } ++ ++ length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, file->f_pos); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header sdirh; ++ ++ if (!squashfs_get_cached_block(i->i_sb, &sdirh, next_block, ++ next_offset, sizeof(sdirh), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, &dirh, next_block, ++ next_offset, sizeof(dirh), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry sdire; ++ if (!squashfs_get_cached_block(i->i_sb, &sdire, next_block, ++ next_offset, sizeof(sdire), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, dire, next_block, ++ next_offset, sizeof(*dire), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, next_block, ++ next_offset, dire->size + 1, &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (file->f_pos >= length) ++ continue; ++ ++ dire->name[dire->size + 1] = '\0'; ++ ++ TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d, %d)\n", ++ (unsigned int) dirent, dire->name, dire->size + 1, ++ (int) file->f_pos, dirh.start_block, dire->offset, ++ dirh.inode_number + dire->inode_number, ++ squashfs_filetype_table[dire->type]); ++ ++ if (filldir(dirent, dire->name, dire->size + 1, file->f_pos, ++ dirh.inode_number + dire->inode_number, ++ squashfs_filetype_table[dire->type]) < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ goto finish; ++ } ++ file->f_pos = length; ++ } ++ } ++ ++finish: ++ kfree(dire); ++ return 0; ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ kfree(dire); ++ return 0; ++} ++ ++ ++const struct file_operations squashfs_dir_ops = { ++ .read = generic_read_dir, ++ .readdir = squashfs_readdir ++}; +diff -uNr a/fs/squashfs/export.c b/fs/squashfs/export.c +--- a/fs/squashfs/export.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/export.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,171 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * export.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++static squashfs_inode_t squashfs_inode_lookup(struct super_block *s, int ino) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start = msblk->inode_lookup_table[SQUASHFS_LOOKUP_BLOCK(ino - 1)]; ++ int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino - 1); ++ squashfs_inode_t inode; ++ ++ TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino); ++ ++ if (msblk->swap) { ++ squashfs_inode_t sinode; ++ ++ if (!squashfs_get_cached_block(s, &sinode, start, offset, ++ sizeof(sinode), &start, &offset)) ++ goto out; ++ SQUASHFS_SWAP_INODE_T((&inode), &sinode); ++ } else if (!squashfs_get_cached_block(s, &inode, start, offset, ++ sizeof(inode), &start, &offset)) ++ goto out; ++ ++ TRACE("squashfs_inode_lookup, inode = 0x%llx\n", inode); ++ ++ return inode; ++ ++out: ++ return SQUASHFS_INVALID_BLK; ++} ++ ++ ++static struct dentry *squashfs_export_iget(struct super_block *s, ++ unsigned int inode_number) ++{ ++ squashfs_inode_t inode; ++ struct inode *i; ++ struct dentry *dentry; ++ ++ TRACE("Entered squashfs_export_iget\n"); ++ ++ inode = squashfs_inode_lookup(s, inode_number); ++ if(inode == SQUASHFS_INVALID_BLK) { ++ dentry = ERR_PTR(-ENOENT); ++ goto failure; ++ } ++ ++ i = squashfs_iget(s, inode, inode_number); ++ if(i == NULL) { ++ dentry = ERR_PTR(-EACCES); ++ goto failure; ++ } ++ ++ dentry = d_alloc_anon(i); ++ if (dentry == NULL) { ++ iput(i); ++ dentry = ERR_PTR(-ENOMEM); ++ } ++ ++failure: ++ return dentry; ++} ++ ++ ++static struct dentry *squashfs_fh_to_dentry(struct super_block *s, ++ struct fid *fid, int fh_len, int fh_type) ++{ ++ if((fh_type != FILEID_INO32_GEN && fh_type != FILEID_INO32_GEN_PARENT) || ++ fh_len < 2) ++ return NULL; ++ ++ return squashfs_export_iget(s, fid->i32.ino); ++} ++ ++ ++static struct dentry *squashfs_fh_to_parent(struct super_block *s, ++ struct fid *fid, int fh_len, int fh_type) ++{ ++ if(fh_type != FILEID_INO32_GEN_PARENT || fh_len < 4) ++ return NULL; ++ ++ return squashfs_export_iget(s, fid->i32.parent_ino); ++} ++ ++ ++static struct dentry *squashfs_get_parent(struct dentry *child) ++{ ++ struct inode *i = child->d_inode; ++ ++ TRACE("Entered squashfs_get_parent\n"); ++ ++ return squashfs_export_iget(i->i_sb, SQUASHFS_I(i)->u.s2.parent_inode); ++} ++ ++ ++int read_inode_lookup_table(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(sblk->inodes); ++ ++ TRACE("In read_inode_lookup_table, length %d\n", length); ++ ++ /* Allocate inode lookup table */ ++ msblk->inode_lookup_table = kmalloc(length, GFP_KERNEL); ++ if (msblk->inode_lookup_table == NULL) { ++ ERROR("Failed to allocate inode lookup table\n"); ++ return 0; ++ } ++ ++ if (!squashfs_read_data(s, (char *) msblk->inode_lookup_table, ++ sblk->lookup_table_start, length | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) { ++ ERROR("unable to read inode lookup table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ long long block; ++ ++ for (i = 0; i < SQUASHFS_LOOKUP_BLOCKS(sblk->inodes); i++) { ++ SQUASHFS_SWAP_LOOKUP_BLOCKS((&block), ++ &msblk->inode_lookup_table[i], 1); ++ msblk->inode_lookup_table[i] = block; ++ } ++ } ++ ++ return 1; ++} ++ ++ ++const struct export_operations squashfs_export_ops = { ++ .fh_to_dentry = squashfs_fh_to_dentry, ++ .fh_to_parent = squashfs_fh_to_parent, ++ .get_parent = squashfs_get_parent ++}; +diff -uNr a/fs/squashfs/file.c b/fs/squashfs/file.c +--- a/fs/squashfs/file.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/file.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,430 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * file.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++static struct meta_index *locate_meta_index(struct inode *inode, int index, int offset) ++{ ++ struct meta_index *meta = NULL; ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ int i; ++ ++ mutex_lock(&msblk->meta_index_mutex); ++ ++ TRACE("locate_meta_index: index %d, offset %d\n", index, offset); ++ ++ if (msblk->meta_index == NULL) ++ goto not_allocated; ++ ++ for (i = 0; i < SQUASHFS_META_NUMBER; i ++) { ++ if (msblk->meta_index[i].inode_number == inode->i_ino && ++ msblk->meta_index[i].offset >= offset && ++ msblk->meta_index[i].offset <= index && ++ msblk->meta_index[i].locked == 0) { ++ TRACE("locate_meta_index: entry %d, offset %d\n", i, ++ msblk->meta_index[i].offset); ++ meta = &msblk->meta_index[i]; ++ offset = meta->offset; ++ } ++ } ++ ++ if (meta) ++ meta->locked = 1; ++ ++not_allocated: ++ mutex_unlock(&msblk->meta_index_mutex); ++ ++ return meta; ++} ++ ++ ++static struct meta_index *empty_meta_index(struct inode *inode, int offset, int skip) ++{ ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct meta_index *meta = NULL; ++ int i; ++ ++ mutex_lock(&msblk->meta_index_mutex); ++ ++ TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); ++ ++ if (msblk->meta_index == NULL) { ++ msblk->meta_index = kmalloc(sizeof(struct meta_index) * ++ SQUASHFS_META_NUMBER, GFP_KERNEL); ++ if (msblk->meta_index == NULL) { ++ ERROR("Failed to allocate meta_index\n"); ++ goto failed; ++ } ++ for (i = 0; i < SQUASHFS_META_NUMBER; i++) { ++ msblk->meta_index[i].inode_number = 0; ++ msblk->meta_index[i].locked = 0; ++ } ++ msblk->next_meta_index = 0; ++ } ++ ++ for (i = SQUASHFS_META_NUMBER; i && ++ msblk->meta_index[msblk->next_meta_index].locked; i --) ++ msblk->next_meta_index = (msblk->next_meta_index + 1) % ++ SQUASHFS_META_NUMBER; ++ ++ if (i == 0) { ++ TRACE("empty_meta_index: failed!\n"); ++ goto failed; ++ } ++ ++ TRACE("empty_meta_index: returned meta entry %d, %p\n", ++ msblk->next_meta_index, ++ &msblk->meta_index[msblk->next_meta_index]); ++ ++ meta = &msblk->meta_index[msblk->next_meta_index]; ++ msblk->next_meta_index = (msblk->next_meta_index + 1) % ++ SQUASHFS_META_NUMBER; ++ ++ meta->inode_number = inode->i_ino; ++ meta->offset = offset; ++ meta->skip = skip; ++ meta->entries = 0; ++ meta->locked = 1; ++ ++failed: ++ mutex_unlock(&msblk->meta_index_mutex); ++ return meta; ++} ++ ++ ++static void release_meta_index(struct inode *inode, struct meta_index *meta) ++{ ++ meta->locked = 0; ++ smp_mb(); ++} ++ ++ ++static int read_block_index(struct super_block *s, int blocks, char *block_list, ++ long long *start_block, int *offset) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ unsigned int *block_listp; ++ int block = 0; ++ ++ if (msblk->swap) { ++ char sblock_list[blocks << 2]; ++ ++ if (!squashfs_get_cached_block(s, sblock_list, *start_block, ++ *offset, blocks << 2, start_block, offset)) { ++ ERROR("Fail reading block list [%llx:%x]\n", *start_block, *offset); ++ goto failure; ++ } ++ SQUASHFS_SWAP_INTS(((unsigned int *)block_list), ++ ((unsigned int *)sblock_list), blocks); ++ } else { ++ if (!squashfs_get_cached_block(s, block_list, *start_block, ++ *offset, blocks << 2, start_block, offset)) { ++ ERROR("Fail reading block list [%llx:%x]\n", *start_block, *offset); ++ goto failure; ++ } ++ } ++ ++ for (block_listp = (unsigned int *) block_list; blocks; ++ block_listp++, blocks --) ++ block += SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_listp); ++ ++ return block; ++ ++failure: ++ return -1; ++} ++ ++ ++#define SIZE 256 ++ ++static inline int calculate_skip(int blocks) { ++ int skip = (blocks - 1) / ((SQUASHFS_SLOTS * SQUASHFS_META_ENTRIES + 1) * SQUASHFS_META_INDEXES); ++ return skip >= 7 ? 7 : skip + 1; ++} ++ ++ ++static int get_meta_index(struct inode *inode, int index, ++ long long *index_block, int *index_offset, ++ long long *data_block, char *block_list) ++{ ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int skip = calculate_skip(i_size_read(inode) >> sblk->block_log); ++ int offset = 0; ++ struct meta_index *meta; ++ struct meta_entry *meta_entry; ++ long long cur_index_block = SQUASHFS_I(inode)->u.s1.block_list_start; ++ int cur_offset = SQUASHFS_I(inode)->offset; ++ long long cur_data_block = SQUASHFS_I(inode)->start_block; ++ int i; ++ ++ index /= SQUASHFS_META_INDEXES * skip; ++ ++ while (offset < index) { ++ meta = locate_meta_index(inode, index, offset + 1); ++ ++ if (meta == NULL) { ++ meta = empty_meta_index(inode, offset + 1, skip); ++ if (meta == NULL) ++ goto all_done; ++ } else { ++ if(meta->entries == 0) ++ goto failed; ++ /* XXX */ ++ offset = index < meta->offset + meta->entries ? index : ++ meta->offset + meta->entries - 1; ++ /* XXX */ ++ meta_entry = &meta->meta_entry[offset - meta->offset]; ++ cur_index_block = meta_entry->index_block + sblk->inode_table_start; ++ cur_offset = meta_entry->offset; ++ cur_data_block = meta_entry->data_block; ++ TRACE("get_meta_index: offset %d, meta->offset %d, " ++ "meta->entries %d\n", offset, meta->offset, meta->entries); ++ TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" ++ " data_block 0x%llx\n", cur_index_block, ++ cur_offset, cur_data_block); ++ } ++ ++ for (i = meta->offset + meta->entries; i <= index && ++ i < meta->offset + SQUASHFS_META_ENTRIES; i++) { ++ int blocks = skip * SQUASHFS_META_INDEXES; ++ ++ while (blocks) { ++ int block = blocks > (SIZE >> 2) ? (SIZE >> 2) : blocks; ++ int res = read_block_index(inode->i_sb, block, block_list, ++ &cur_index_block, &cur_offset); ++ ++ if (res == -1) ++ goto failed; ++ ++ cur_data_block += res; ++ blocks -= block; ++ } ++ ++ meta_entry = &meta->meta_entry[i - meta->offset]; ++ meta_entry->index_block = cur_index_block - sblk->inode_table_start; ++ meta_entry->offset = cur_offset; ++ meta_entry->data_block = cur_data_block; ++ meta->entries ++; ++ offset ++; ++ } ++ ++ TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", ++ meta->offset, meta->entries); ++ ++ release_meta_index(inode, meta); ++ } ++ ++all_done: ++ *index_block = cur_index_block; ++ *index_offset = cur_offset; ++ *data_block = cur_data_block; ++ ++ return offset * SQUASHFS_META_INDEXES * skip; ++ ++failed: ++ release_meta_index(inode, meta); ++ return -1; ++} ++ ++ ++long long read_blocklist(struct inode *inode, int index, ++ int readahead_blks, char *block_list, ++ unsigned short **block_p, unsigned int *bsize) ++{ ++ long long block_ptr; ++ int offset; ++ long long block; ++ int res = get_meta_index(inode, index, &block_ptr, &offset, &block, ++ block_list); ++ ++ TRACE("read_blocklist: res %d, index %d, block_ptr 0x%llx, offset" ++ " 0x%x, block 0x%llx\n", res, index, block_ptr, offset, block); ++ ++ if(res == -1) ++ goto failure; ++ ++ index -= res; ++ ++ while (index) { ++ int blocks = index > (SIZE >> 2) ? (SIZE >> 2) : index; ++ int res = read_block_index(inode->i_sb, blocks, block_list, ++ &block_ptr, &offset); ++ if (res == -1) ++ goto failure; ++ block += res; ++ index -= blocks; ++ } ++ ++ if (read_block_index(inode->i_sb, 1, block_list, &block_ptr, &offset) == -1) ++ goto failure; ++ *bsize = *((unsigned int *) block_list); ++ ++ return block; ++ ++failure: ++ return 0; ++} ++ ++ ++static int squashfs_readpage(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned char *block_list = NULL; ++ long long block; ++ unsigned int bsize, i; ++ int bytes; ++ int index = page->index >> (sblk->block_log - PAGE_CACHE_SHIFT); ++ void *pageaddr; ++ struct squashfs_cache_entry *fragment = NULL; ++ char *data_ptr = msblk->read_page; ++ ++ int mask = (1 << (sblk->block_log - PAGE_CACHE_SHIFT)) - 1; ++ int start_index = page->index & ~mask; ++ int end_index = start_index | mask; ++ int file_end = i_size_read(inode) >> sblk->block_log; ++ int sparse = 0; ++ ++ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", ++ page->index, SQUASHFS_I(inode)->start_block); ++ ++ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> ++ PAGE_CACHE_SHIFT)) ++ goto out; ++ ++ if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK ++ || index < file_end) { ++ block_list = kmalloc(SIZE, GFP_KERNEL); ++ if (block_list == NULL) { ++ ERROR("Failed to allocate block_list\n"); ++ goto error_out; ++ } ++ ++ block = (msblk->read_blocklist)(inode, index, 1, block_list, NULL, &bsize); ++ if (block == 0) ++ goto error_out; ++ ++ if (bsize == 0) { /* hole */ ++ bytes = index == file_end ? ++ (i_size_read(inode) & (sblk->block_size - 1)) : sblk->block_size; ++ sparse = 1; ++ } else { ++ mutex_lock(&msblk->read_page_mutex); ++ ++ bytes = squashfs_read_data(inode->i_sb, msblk->read_page, block, ++ bsize, NULL, sblk->block_size); ++ ++ if (bytes == 0) { ++ ERROR("Unable to read page, block %llx, size %x\n", block, bsize); ++ mutex_unlock(&msblk->read_page_mutex); ++ goto error_out; ++ } ++ } ++ } else { ++ fragment = get_cached_fragment(inode->i_sb, ++ SQUASHFS_I(inode)-> u.s1.fragment_start_block, ++ SQUASHFS_I(inode)->u.s1.fragment_size); ++ ++ if (fragment->error) { ++ ERROR("Unable to read page, block %llx, size %x\n", ++ SQUASHFS_I(inode)->u.s1.fragment_start_block, ++ (int) SQUASHFS_I(inode)->u.s1.fragment_size); ++ release_cached_fragment(msblk, fragment); ++ goto error_out; ++ } ++ bytes = i_size_read(inode) & (sblk->block_size - 1); ++ data_ptr = fragment->data + SQUASHFS_I(inode)->u.s1.fragment_offset; ++ } ++ ++ for (i = start_index; i <= end_index && bytes > 0; i++, ++ bytes -= PAGE_CACHE_SIZE, data_ptr += PAGE_CACHE_SIZE) { ++ struct page *push_page; ++ int avail = sparse ? 0 : min_t(unsigned int, bytes, PAGE_CACHE_SIZE); ++ ++ TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); ++ ++ push_page = (i == page->index) ? page : ++ grab_cache_page_nowait(page->mapping, i); ++ ++ if (!push_page) ++ continue; ++ ++ if (PageUptodate(push_page)) ++ goto skip_page; ++ ++ pageaddr = kmap_atomic(push_page, KM_USER0); ++ memcpy(pageaddr, data_ptr, avail); ++ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); ++ kunmap_atomic(pageaddr, KM_USER0); ++ flush_dcache_page(push_page); ++ SetPageUptodate(push_page); ++skip_page: ++ unlock_page(push_page); ++ if(i != page->index) ++ page_cache_release(push_page); ++ } ++ ++ if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK ++ || index < file_end) { ++ if (!sparse) ++ mutex_unlock(&msblk->read_page_mutex); ++ kfree(block_list); ++ } else ++ release_cached_fragment(msblk, fragment); ++ ++ return 0; ++ ++error_out: ++ SetPageError(page); ++out: ++ pageaddr = kmap_atomic(page, KM_USER0); ++ memset(pageaddr, 0, PAGE_CACHE_SIZE); ++ kunmap_atomic(pageaddr, KM_USER0); ++ flush_dcache_page(page); ++ if (!PageError(page)) ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ kfree(block_list); ++ return 0; ++} ++ ++ ++const struct address_space_operations squashfs_aops = { ++ .readpage = squashfs_readpage ++}; +diff -uNr a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c +--- a/fs/squashfs/fragment.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/fragment.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,122 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * fragment.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++int get_fragment_location(struct super_block *s, unsigned int fragment, ++ long long *fragment_start_block, ++ unsigned int *fragment_size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start_block = ++ msblk->fragment_index[SQUASHFS_FRAGMENT_INDEX(fragment)]; ++ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); ++ struct squashfs_fragment_entry fragment_entry; ++ ++ if (msblk->swap) { ++ struct squashfs_fragment_entry sfragment_entry; ++ ++ if (!squashfs_get_cached_block(s, &sfragment_entry, start_block, offset, ++ sizeof(sfragment_entry), &start_block, &offset)) ++ goto out; ++ SQUASHFS_SWAP_FRAGMENT_ENTRY(&fragment_entry, &sfragment_entry); ++ } else ++ if (!squashfs_get_cached_block(s, &fragment_entry, start_block, offset, ++ sizeof(fragment_entry), &start_block, &offset)) ++ goto out; ++ ++ *fragment_start_block = fragment_entry.start_block; ++ *fragment_size = fragment_entry.size; ++ ++ return 1; ++ ++out: ++ return 0; ++} ++ ++ ++void release_cached_fragment(struct squashfs_sb_info *msblk, ++ struct squashfs_cache_entry *fragment) ++{ ++ squashfs_cache_put(msblk->fragment_cache, fragment); ++} ++ ++ ++struct squashfs_cache_entry *get_cached_fragment(struct super_block *s, ++ long long start_block, int length) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ ++ return squashfs_cache_get(s, msblk->fragment_cache, start_block, length); ++} ++ ++ ++int read_fragment_index_table(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(sblk->fragments); ++ ++ if(length == 0) ++ return 1; ++ ++ /* Allocate fragment index table */ ++ msblk->fragment_index = kmalloc(length, GFP_KERNEL); ++ if (msblk->fragment_index == NULL) { ++ ERROR("Failed to allocate fragment index table\n"); ++ return 0; ++ } ++ ++ if (!squashfs_read_data(s, (char *) msblk->fragment_index, ++ sblk->fragment_table_start, length | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) { ++ ERROR("unable to read fragment index table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ long long fragment; ++ ++ for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES(sblk->fragments); i++) { ++ SQUASHFS_SWAP_FRAGMENT_INDEXES((&fragment), ++ &msblk->fragment_index[i], 1); ++ msblk->fragment_index[i] = fragment; ++ } ++ } ++ ++ return 1; ++} +diff -uNr a/fs/squashfs/id.c b/fs/squashfs/id.c +--- a/fs/squashfs/id.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/id.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,97 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * id.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++int get_id(struct super_block *s, unsigned int index, unsigned int *id) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start_block = msblk->id_table[SQUASHFS_ID_BLOCK(index)]; ++ int offset = SQUASHFS_ID_BLOCK_OFFSET(index); ++ ++ if (msblk->swap) { ++ unsigned int sid; ++ ++ if (!squashfs_get_cached_block(s, &sid, start_block, offset, ++ sizeof(unsigned int), &start_block, &offset)) ++ goto out; ++ SQUASHFS_SWAP_INTS((&sid), id, 1); ++ } else ++ if (!squashfs_get_cached_block(s, id, start_block, offset, ++ sizeof(unsigned int), &start_block, &offset)) ++ goto out; ++ ++ return 1; ++ ++out: ++ return 0; ++} ++ ++ ++int read_id_index_table(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int length = SQUASHFS_ID_BLOCK_BYTES(sblk->no_ids); ++ ++ TRACE("In read_id_index_table, length %d\n", length); ++ ++ /* Allocate id index table */ ++ msblk->id_table = kmalloc(length, GFP_KERNEL); ++ if (msblk->id_table == NULL) { ++ ERROR("Failed to allocate id index table\n"); ++ return 0; ++ } ++ ++ if (!squashfs_read_data(s, (char *) msblk->id_table, ++ sblk->id_table_start, length | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) { ++ ERROR("unable to read id index table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ long long block; ++ ++ for (i = 0; i < SQUASHFS_ID_BLOCKS(sblk->no_ids); i++) { ++ SQUASHFS_SWAP_ID_BLOCKS((&block), &msblk->id_table[i], 1); ++ msblk->id_table[i] = block; ++ } ++ } ++ ++ return 1; ++} +diff -uNr a/fs/squashfs/inode.c b/fs/squashfs/inode.c +--- a/fs/squashfs/inode.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/inode.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,340 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * inode.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++static int squashfs_new_inode(struct super_block *s, struct inode *i, ++ struct squashfs_base_inode_header *inodeb) ++{ ++ if(get_id(s, inodeb->uid, &i->i_uid) == 0) ++ goto out; ++ if(get_id(s, inodeb->guid, &i->i_gid) == 0) ++ goto out; ++ ++ i->i_ino = inodeb->inode_number; ++ i->i_mtime.tv_sec = inodeb->mtime; ++ i->i_atime.tv_sec = inodeb->mtime; ++ i->i_ctime.tv_sec = inodeb->mtime; ++ i->i_mode = inodeb->mode; ++ i->i_size = 0; ++ ++ return 1; ++ ++out: ++ return 0; ++} ++ ++ ++struct inode *squashfs_iget(struct super_block *s, ++ squashfs_inode_t inode, unsigned int inode_number) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct inode *i = iget_locked(s, inode_number); ++ ++ TRACE("Entered squashfs_iget\n"); ++ ++ if(i && (i->i_state & I_NEW)) { ++ (msblk->read_inode)(i, inode); ++ unlock_new_inode(i); ++ } ++ ++ return i; ++} ++ ++ ++int squashfs_read_inode(struct inode *i, squashfs_inode_t inode) ++{ ++ struct super_block *s = i->i_sb; ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long block = SQUASHFS_INODE_BLK(inode) + sblk->inode_table_start; ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode); ++ long long next_block; ++ unsigned int next_offset; ++ union squashfs_inode_header id, sid; ++ struct squashfs_base_inode_header *inodeb = &id.base, *sinodeb = &sid.base; ++ ++ TRACE("Entered squashfs_read_inode\n"); ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodeb, block, offset, ++ sizeof(*sinodeb), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_BASE_INODE_HEADER(inodeb, sinodeb, sizeof(*sinodeb)); ++ } else ++ if (!squashfs_get_cached_block(s, inodeb, block, offset, ++ sizeof(*inodeb), &next_block, &next_offset)) ++ goto failed_read; ++ ++ if(squashfs_new_inode(s, i, inodeb) == 0) ++ goto failed_read; ++ ++ switch(inodeb->inode_type) { ++ case SQUASHFS_FILE_TYPE: { ++ unsigned int frag_size; ++ long long frag_blk; ++ struct squashfs_reg_inode_header *inodep = &id.reg; ++ struct squashfs_reg_inode_header *sinodep = &sid.reg; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_REG_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ frag_blk = SQUASHFS_INVALID_BLK; ++ ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG) ++ if(!get_fragment_location(s, inodep->fragment, &frag_blk, ++ &frag_size)) ++ goto failed_read; ++ ++ i->i_nlink = 1; ++ i->i_size = inodep->file_size; ++ i->i_fop = &generic_ro_fops; ++ i->i_mode |= S_IFREG; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk; ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size; ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ i->i_data.a_ops = &squashfs_aops; ++ ++ TRACE("File inode %x:%x, start_block %llx, " ++ "block_list_start %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, next_block, ++ next_offset); ++ break; ++ } ++ case SQUASHFS_LREG_TYPE: { ++ unsigned int frag_size; ++ long long frag_blk; ++ struct squashfs_lreg_inode_header *inodep = &id.lreg; ++ struct squashfs_lreg_inode_header *sinodep = &sid.lreg; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LREG_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ frag_blk = SQUASHFS_INVALID_BLK; ++ ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG) ++ if (!get_fragment_location(s, inodep->fragment, &frag_blk, ++ &frag_size)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->file_size; ++ i->i_fop = &generic_ro_fops; ++ i->i_mode |= S_IFREG; ++ i->i_blocks = ((inodep->file_size - inodep->sparse - 1) >> 9) + 1; ++ ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk; ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size; ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ i->i_data.a_ops = &squashfs_aops; ++ ++ TRACE("File inode %x:%x, start_block %llx, " ++ "block_list_start %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, next_block, ++ next_offset); ++ break; ++ } ++ case SQUASHFS_DIR_TYPE: { ++ struct squashfs_dir_inode_header *inodep = &id.dir; ++ struct squashfs_dir_inode_header *sinodep = &sid.dir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DIR_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0; ++ SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode; ++ ++ TRACE("Directory inode %x:%x, start_block %x, offset " ++ "%x\n", SQUASHFS_INODE_BLK(inode), ++ offset, inodep->start_block, ++ inodep->offset); ++ break; ++ } ++ case SQUASHFS_LDIR_TYPE: { ++ struct squashfs_ldir_inode_header *inodep = &id.ldir; ++ struct squashfs_ldir_inode_header *sinodep = &sid.ldir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LDIR_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_start = next_block; ++ SQUASHFS_I(i)->u.s2.directory_index_offset = next_offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = inodep->i_count; ++ SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode; ++ ++ TRACE("Long directory inode %x:%x, start_block %x, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, inodep->offset); ++ break; ++ } ++ case SQUASHFS_SYMLINK_TYPE: { ++ struct squashfs_symlink_inode_header *inodep = &id.symlink; ++ struct squashfs_symlink_inode_header *sinodep = &sid.symlink; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->symlink_size; ++ i->i_op = &page_symlink_inode_operations; ++ i->i_data.a_ops = &squashfs_symlink_aops; ++ i->i_mode |= S_IFLNK; ++ SQUASHFS_I(i)->start_block = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ ++ TRACE("Symbolic link inode %x:%x, start_block %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_BLKDEV_TYPE: ++ case SQUASHFS_CHRDEV_TYPE: { ++ struct squashfs_dev_inode_header *inodep = &id.dev; ++ struct squashfs_dev_inode_header *sinodep = &sid.dev; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DEV_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_mode |= (inodeb->inode_type == SQUASHFS_CHRDEV_TYPE) ? ++ S_IFCHR : S_IFBLK; ++ init_special_inode(i, i->i_mode, old_decode_dev(inodep->rdev)); ++ ++ TRACE("Device inode %x:%x, rdev %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, inodep->rdev); ++ break; ++ } ++ case SQUASHFS_FIFO_TYPE: ++ case SQUASHFS_SOCKET_TYPE: { ++ struct squashfs_ipc_inode_header *inodep = &id.ipc; ++ struct squashfs_ipc_inode_header *sinodep = &sid.ipc; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, sinodep, block, offset, ++ sizeof(*sinodep), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_IPC_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, inodep, block, offset, ++ sizeof(*inodep), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE) ++ ? S_IFIFO : S_IFSOCK; ++ init_special_inode(i, i->i_mode, 0); ++ break; ++ } ++ default: ++ ERROR("Unknown inode type %d in squashfs_iget!\n", ++ inodeb->inode_type); ++ goto failed_read1; ++ } ++ ++ return 1; ++ ++failed_read: ++ ERROR("Unable to read inode [%llx:%x]\n", block, offset); ++ ++failed_read1: ++ make_bad_inode(i); ++ return 0; ++} +diff -uNr a/fs/squashfs/Makefile b/fs/squashfs/Makefile +--- a/fs/squashfs/Makefile 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/Makefile 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,8 @@ ++# ++# Makefile for the linux squashfs routines. ++# ++ ++obj-$(CONFIG_SQUASHFS) += squashfs.o ++squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o ++squashfs-y += namei.o super.o symlink.o ++#squashfs-y += squashfs2_0.o +diff -uNr a/fs/squashfs/namei.c b/fs/squashfs/namei.c +--- a/fs/squashfs/namei.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/namei.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,200 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * namei.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++static int get_dir_index_using_name(struct super_block *s, ++ long long *next_block, unsigned int *next_offset, ++ long long index_start, unsigned int index_offset, int i_count, ++ const char *name, int size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index *index; ++ char *str; ++ ++ TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); ++ ++ str = kmalloc(sizeof(struct squashfs_dir_index) + ++ (SQUASHFS_NAME_LEN + 1) * 2, GFP_KERNEL); ++ if (str == NULL) { ++ ERROR("Failed to allocate squashfs_dir_index\n"); ++ goto failure; ++ } ++ ++ index = (struct squashfs_dir_index *) (str + SQUASHFS_NAME_LEN + 1); ++ strncpy(str, name, size); ++ str[size] = '\0'; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index sindex; ++ squashfs_get_cached_block(s, &sindex, index_start, index_offset, ++ sizeof(sindex), &index_start, &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX(index, &sindex); ++ } else ++ squashfs_get_cached_block(s, index, index_start, index_offset, ++ sizeof(struct squashfs_dir_index), &index_start, &index_offset); ++ ++ squashfs_get_cached_block(s, index->name, index_start, index_offset, ++ index->size + 1, &index_start, &index_offset); ++ ++ index->name[index->size + 1] = '\0'; ++ ++ if (strcmp(index->name, str) > 0) ++ break; ++ ++ length = index->index; ++ *next_block = index->start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ kfree(str); ++ ++failure: ++ return length + 3; ++} ++ ++ ++static struct dentry *squashfs_lookup(struct inode *i, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ const unsigned char *name = dentry->d_name.name; ++ int len = dentry->d_name.len; ++ struct inode *inode = NULL; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, dir_count; ++ struct squashfs_dir_header dirh; ++ struct squashfs_dir_entry *dire; ++ ++ TRACE("Entered squashfs_lookup [%llx:%x]\n", next_block, next_offset); ++ ++ dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL); ++ if (dire == NULL) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto exit_lookup; ++ } ++ ++ if (len > SQUASHFS_NAME_LEN) ++ goto exit_lookup; ++ ++ length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, name, len); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header sdirh; ++ if (!squashfs_get_cached_block(i->i_sb, &sdirh, next_block, ++ next_offset, sizeof(sdirh), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, &dirh, next_block, ++ next_offset, sizeof(dirh), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry sdire; ++ if (!squashfs_get_cached_block(i->i_sb, &sdire, next_block, ++ next_offset, sizeof(sdire), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, dire, next_block, ++ next_offset, sizeof(*dire), &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, next_block, ++ next_offset, dire->size + 1, &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (name[0] < dire->name[0]) ++ goto exit_lookup; ++ ++ if ((len == dire->size + 1) && !strncmp(name, dire->name, len)) { ++ squashfs_inode_t ino = SQUASHFS_MKINODE(dirh.start_block, ++ dire->offset); ++ ++ TRACE("calling squashfs_iget for directory entry %s, inode" ++ " %x:%x, %d\n", name, dirh.start_block, dire->offset, ++ dirh.inode_number + dire->inode_number); ++ ++ inode = squashfs_iget(i->i_sb, ino, dirh.inode_number + dire->inode_number); ++ ++ goto exit_lookup; ++ } ++ } ++ } ++ ++exit_lookup: ++ kfree(dire); ++ if (inode) ++ return d_splice_alias(inode, dentry); ++ d_add(dentry, inode); ++ return ERR_PTR(0); ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ goto exit_lookup; ++} ++ ++ ++const struct inode_operations squashfs_dir_inode_ops = { ++ .lookup = squashfs_lookup ++}; +diff -uNr a/fs/squashfs/squashfs2_0.c b/fs/squashfs/squashfs2_0.c +--- a/fs/squashfs/squashfs2_0.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/squashfs2_0.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,740 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs2_0.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++ ++#include "squashfs.h" ++static int squashfs_readdir_2(struct file *file, void *dirent, filldir_t filldir); ++static struct dentry *squashfs_lookup_2(struct inode *, struct dentry *, ++ struct nameidata *); ++ ++static struct file_operations squashfs_dir_ops_2 = { ++ .read = generic_read_dir, ++ .readdir = squashfs_readdir_2 ++}; ++ ++static struct inode_operations squashfs_dir_inode_ops_2 = { ++ .lookup = squashfs_lookup_2 ++}; ++ ++static unsigned char squashfs_filetype_table[] = { ++ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK ++}; ++ ++static int read_fragment_index_table_2(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ if (!(msblk->fragment_index_2 = kmalloc(SQUASHFS_FRAGMENT_INDEX_BYTES_2 ++ (sblk->fragments), GFP_KERNEL))) { ++ ERROR("Failed to allocate uid/gid table\n"); ++ return 0; ++ } ++ ++ if (SQUASHFS_FRAGMENT_INDEX_BYTES_2(sblk->fragments) && ++ !squashfs_read_data(s, (char *) ++ msblk->fragment_index_2, ++ sblk->fragment_table_start, ++ SQUASHFS_FRAGMENT_INDEX_BYTES_2 ++ (sblk->fragments) | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, SQUASHFS_FRAGMENT_INDEX_BYTES_2(sblk->fragments))) { ++ ERROR("unable to read fragment index table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ unsigned int fragment; ++ ++ for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES_2(sblk->fragments); ++ i++) { ++ SQUASHFS_SWAP_FRAGMENT_INDEXES_2((&fragment), ++ &msblk->fragment_index_2[i], 1); ++ msblk->fragment_index_2[i] = fragment; ++ } ++ } ++ ++ return 1; ++} ++ ++ ++static int get_fragment_location_2(struct super_block *s, unsigned int fragment, ++ long long *fragment_start_block, ++ unsigned int *fragment_size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start_block = ++ msblk->fragment_index_2[SQUASHFS_FRAGMENT_INDEX_2(fragment)]; ++ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET_2(fragment); ++ struct squashfs_fragment_entry_2 fragment_entry; ++ ++ if (msblk->swap) { ++ struct squashfs_fragment_entry_2 sfragment_entry; ++ ++ if (!squashfs_get_cached_block(s, (char *) &sfragment_entry, ++ start_block, offset, ++ sizeof(sfragment_entry), &start_block, ++ &offset)) ++ goto out; ++ SQUASHFS_SWAP_FRAGMENT_ENTRY_2(&fragment_entry, &sfragment_entry); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) &fragment_entry, ++ start_block, offset, ++ sizeof(fragment_entry), &start_block, ++ &offset)) ++ goto out; ++ ++ *fragment_start_block = fragment_entry.start_block; ++ *fragment_size = fragment_entry.size; ++ ++ return 1; ++ ++out: ++ return 0; ++} ++ ++ ++static void squashfs_new_inode(struct squashfs_sb_info *msblk, struct inode *i, ++ struct squashfs_base_inode_header_2 *inodeb, unsigned int ino) ++{ ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ i->i_ino = ino; ++ i->i_mtime.tv_sec = sblk->mkfs_time; ++ i->i_atime.tv_sec = sblk->mkfs_time; ++ i->i_ctime.tv_sec = sblk->mkfs_time; ++ i->i_uid = msblk->uid[inodeb->uid]; ++ i->i_mode = inodeb->mode; ++ i->i_nlink = 1; ++ i->i_size = 0; ++ if (inodeb->guid == SQUASHFS_GUIDS) ++ i->i_gid = i->i_uid; ++ else ++ i->i_gid = msblk->guid[inodeb->guid]; ++} ++ ++ ++static int squashfs_read_inode_2(struct inode *i, squashfs_inode_t inode) ++{ ++ struct super_block *s = i->i_sb; ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int block = SQUASHFS_INODE_BLK(inode) + ++ sblk->inode_table_start; ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode); ++ unsigned int ino = SQUASHFS_MK_VFS_INODE(block - ++ sblk->inode_table_start, offset); ++ long long next_block; ++ unsigned int next_offset; ++ union squashfs_inode_header_2 id, sid; ++ struct squashfs_base_inode_header_2 *inodeb = &id.base, ++ *sinodeb = &sid.base; ++ ++ TRACE("Entered squashfs_read_inode_2\n"); ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) sinodeb, block, ++ offset, sizeof(*sinodeb), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_BASE_INODE_HEADER_2(inodeb, sinodeb, ++ sizeof(*sinodeb)); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) inodeb, block, ++ offset, sizeof(*inodeb), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ squashfs_new_inode(msblk, i, inodeb, ino); ++ ++ switch(inodeb->inode_type) { ++ case SQUASHFS_FILE_TYPE: { ++ struct squashfs_reg_inode_header_2 *inodep = &id.reg; ++ struct squashfs_reg_inode_header_2 *sinodep = &sid.reg; ++ long long frag_blk; ++ unsigned int frag_size = 0; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_REG_INODE_HEADER_2(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ frag_blk = SQUASHFS_INVALID_BLK; ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG && ++ !get_fragment_location_2(s, ++ inodep->fragment, &frag_blk, &frag_size)) ++ goto failed_read; ++ ++ i->i_size = inodep->file_size; ++ i->i_fop = &generic_ro_fops; ++ i->i_mode |= S_IFREG; ++ i->i_mtime.tv_sec = inodep->mtime; ++ i->i_atime.tv_sec = inodep->mtime; ++ i->i_ctime.tv_sec = inodep->mtime; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk; ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size; ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ i->i_data.a_ops = &squashfs_aops; ++ ++ TRACE("File inode %x:%x, start_block %x, " ++ "block_list_start %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, next_block, ++ next_offset); ++ break; ++ } ++ case SQUASHFS_DIR_TYPE: { ++ struct squashfs_dir_inode_header_2 *inodep = &id.dir; ++ struct squashfs_dir_inode_header_2 *sinodep = &sid.dir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DIR_INODE_HEADER_2(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops_2; ++ i->i_fop = &squashfs_dir_ops_2; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep->mtime; ++ i->i_atime.tv_sec = inodep->mtime; ++ i->i_ctime.tv_sec = inodep->mtime; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0; ++ SQUASHFS_I(i)->u.s2.parent_inode = 0; ++ ++ TRACE("Directory inode %x:%x, start_block %x, offset " ++ "%x\n", SQUASHFS_INODE_BLK(inode), ++ offset, inodep->start_block, ++ inodep->offset); ++ break; ++ } ++ case SQUASHFS_LDIR_TYPE: { ++ struct squashfs_ldir_inode_header_2 *inodep = &id.ldir; ++ struct squashfs_ldir_inode_header_2 *sinodep = &sid.ldir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LDIR_INODE_HEADER_2(inodep, ++ sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops_2; ++ i->i_fop = &squashfs_dir_ops_2; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep->mtime; ++ i->i_atime.tv_sec = inodep->mtime; ++ i->i_ctime.tv_sec = inodep->mtime; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_start = next_block; ++ SQUASHFS_I(i)->u.s2.directory_index_offset = ++ next_offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = ++ inodep->i_count; ++ SQUASHFS_I(i)->u.s2.parent_inode = 0; ++ ++ TRACE("Long directory inode %x:%x, start_block %x, " ++ "offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, inodep->offset); ++ break; ++ } ++ case SQUASHFS_SYMLINK_TYPE: { ++ struct squashfs_symlink_inode_header_2 *inodep = ++ &id.symlink; ++ struct squashfs_symlink_inode_header_2 *sinodep = ++ &sid.symlink; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER_2(inodep, ++ sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep->symlink_size; ++ i->i_op = &page_symlink_inode_operations; ++ i->i_data.a_ops = &squashfs_symlink_aops; ++ i->i_mode |= S_IFLNK; ++ SQUASHFS_I(i)->start_block = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ ++ TRACE("Symbolic link inode %x:%x, start_block %llx, " ++ "offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_BLKDEV_TYPE: ++ case SQUASHFS_CHRDEV_TYPE: { ++ struct squashfs_dev_inode_header_2 *inodep = &id.dev; ++ struct squashfs_dev_inode_header_2 *sinodep = &sid.dev; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DEV_INODE_HEADER_2(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_mode |= (inodeb->inode_type == ++ SQUASHFS_CHRDEV_TYPE) ? S_IFCHR : ++ S_IFBLK; ++ init_special_inode(i, i->i_mode, ++ old_decode_dev(inodep->rdev)); ++ ++ TRACE("Device inode %x:%x, rdev %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->rdev); ++ break; ++ } ++ case SQUASHFS_FIFO_TYPE: ++ case SQUASHFS_SOCKET_TYPE: { ++ ++ i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE) ++ ? S_IFIFO : S_IFSOCK; ++ init_special_inode(i, i->i_mode, 0); ++ break; ++ } ++ default: ++ ERROR("Unknown inode type %d in squashfs_iget!\n", ++ inodeb->inode_type); ++ goto failed_read1; ++ } ++ ++ return 1; ++ ++failed_read: ++ ERROR("Unable to read inode [%x:%x]\n", block, offset); ++ ++failed_read1: ++ return 0; ++} ++ ++ ++static int get_dir_index_using_offset(struct super_block *s, long long ++ *next_block, unsigned int *next_offset, ++ long long index_start, ++ unsigned int index_offset, int i_count, ++ long long f_pos) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index_2 index; ++ ++ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n", ++ i_count, (unsigned int) f_pos); ++ ++ if (f_pos == 0) ++ goto finish; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index_2 sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, ++ index_start, index_offset, ++ sizeof(sindex), &index_start, ++ &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX_2(&index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) &index, ++ index_start, index_offset, ++ sizeof(index), &index_start, ++ &index_offset); ++ ++ if (index.index > f_pos) ++ break; ++ ++ squashfs_get_cached_block(s, NULL, index_start, index_offset, ++ index.size + 1, &index_start, ++ &index_offset); ++ ++ length = index.index; ++ *next_block = index.start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ ++finish: ++ return length; ++} ++ ++ ++static int get_dir_index_using_name(struct super_block *s, long long ++ *next_block, unsigned int *next_offset, ++ long long index_start, ++ unsigned int index_offset, int i_count, ++ const char *name, int size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index_2 *index; ++ char *str; ++ ++ TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); ++ ++ if (!(str = kmalloc(sizeof(struct squashfs_dir_index) + ++ (SQUASHFS_NAME_LEN + 1) * 2, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_index\n"); ++ goto failure; ++ } ++ ++ index = (struct squashfs_dir_index_2 *) (str + SQUASHFS_NAME_LEN + 1); ++ strncpy(str, name, size); ++ str[size] = '\0'; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index_2 sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, ++ index_start, index_offset, ++ sizeof(sindex), &index_start, ++ &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX_2(index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) index, ++ index_start, index_offset, ++ sizeof(struct squashfs_dir_index_2), ++ &index_start, &index_offset); ++ ++ squashfs_get_cached_block(s, index->name, index_start, ++ index_offset, index->size + 1, ++ &index_start, &index_offset); ++ ++ index->name[index->size + 1] = '\0'; ++ ++ if (strcmp(index->name, str) > 0) ++ break; ++ ++ length = index->index; ++ *next_block = index->start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ kfree(str); ++failure: ++ return length; ++} ++ ++ ++static int squashfs_readdir_2(struct file *file, void *dirent, filldir_t filldir) ++{ ++ struct inode *i = file->f_dentry->d_inode; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, ++ dir_count; ++ struct squashfs_dir_header_2 dirh; ++ struct squashfs_dir_entry_2 *dire; ++ ++ TRACE("Entered squashfs_readdir_2 [%llx:%x]\n", next_block, next_offset); ++ ++ if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto finish; ++ } ++ ++ length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, ++ file->f_pos); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header_2 sdirh; ++ ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, ++ next_block, next_offset, sizeof(sdirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER_2(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh, ++ next_block, next_offset, sizeof(dirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry_2 sdire; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ &sdire, next_block, next_offset, ++ sizeof(sdire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY_2(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ dire, next_block, next_offset, ++ sizeof(*dire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, ++ dire->size + 1, &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (file->f_pos >= length) ++ continue; ++ ++ dire->name[dire->size + 1] = '\0'; ++ ++ TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d)\n", ++ (unsigned int) dirent, dire->name, ++ dire->size + 1, (int) file->f_pos, ++ dirh.start_block, dire->offset, ++ squashfs_filetype_table[dire->type]); ++ ++ if (filldir(dirent, dire->name, dire->size + 1, ++ file->f_pos, SQUASHFS_MK_VFS_INODE( ++ dirh.start_block, dire->offset), ++ squashfs_filetype_table[dire->type]) ++ < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ goto finish; ++ } ++ file->f_pos = length; ++ } ++ } ++ ++finish: ++ kfree(dire); ++ return 0; ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ kfree(dire); ++ return 0; ++} ++ ++ ++static struct dentry *squashfs_lookup_2(struct inode *i, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ const unsigned char *name = dentry->d_name.name; ++ int len = dentry->d_name.len; ++ struct inode *inode = NULL; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, ++ dir_count; ++ struct squashfs_dir_header_2 dirh; ++ struct squashfs_dir_entry_2 *dire; ++ int sorted = sblk->s_major == 2 && sblk->s_minor >= 1; ++ ++ TRACE("Entered squashfs_lookup_2 [%llx:%x]\n", next_block, next_offset); ++ ++ if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto exit_loop; ++ } ++ ++ if (len > SQUASHFS_NAME_LEN) ++ goto exit_loop; ++ ++ length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, name, ++ len); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header_2 sdirh; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, ++ next_block, next_offset, sizeof(sdirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER_2(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh, ++ next_block, next_offset, sizeof(dirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry_2 sdire; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ &sdire, next_block,next_offset, ++ sizeof(sdire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY_2(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ dire, next_block,next_offset, ++ sizeof(*dire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, dire->size + 1, ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (sorted && name[0] < dire->name[0]) ++ goto exit_loop; ++ ++ if ((len == dire->size + 1) && !strncmp(name, ++ dire->name, len)) { ++ squashfs_inode_t ino = ++ SQUASHFS_MKINODE(dirh.start_block, ++ dire->offset); ++ unsigned int inode_number = SQUASHFS_MK_VFS_INODE(dirh.start_block, ++ dire->offset); ++ ++ TRACE("calling squashfs_iget for directory " ++ "entry %s, inode %x:%x, %lld\n", name, ++ dirh.start_block, dire->offset, ino); ++ ++ inode = squashfs_iget(i->i_sb, ino, inode_number); ++ ++ goto exit_loop; ++ } ++ } ++ } ++ ++exit_loop: ++ kfree(dire); ++ d_add(dentry, inode); ++ return ERR_PTR(0); ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ goto exit_loop; ++} ++ ++ ++int squashfs_2_0_supported(struct squashfs_sb_info *msblk) ++{ ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ msblk->read_inode = squashfs_read_inode_2; ++ msblk->read_fragment_index_table = read_fragment_index_table_2; ++ ++ sblk->bytes_used = sblk->bytes_used_2; ++ sblk->uid_start = sblk->uid_start_2; ++ sblk->guid_start = sblk->guid_start_2; ++ sblk->inode_table_start = sblk->inode_table_start_2; ++ sblk->directory_table_start = sblk->directory_table_start_2; ++ sblk->fragment_table_start = sblk->fragment_table_start_2; ++ ++ return 1; ++} +diff -uNr a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h +--- a/fs/squashfs/squashfs.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/squashfs.h 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,122 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs.h ++ */ ++ ++#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++#undef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++#endif ++ ++#ifdef SQUASHFS_TRACE ++#define TRACE(s, args...) printk(KERN_NOTICE "SQUASHFS: "s, ## args) ++#else ++#define TRACE(s, args...) {} ++#endif ++ ++#define ERROR(s, args...) printk(KERN_ERR "SQUASHFS error: "s, ## args) ++ ++#define SERROR(s, args...) do { \ ++ if (!silent) \ ++ printk(KERN_ERR "SQUASHFS error: "s, ## args);\ ++ } while(0) ++ ++#define WARNING(s, args...) printk(KERN_WARNING "SQUASHFS: "s, ## args) ++ ++static inline struct squashfs_inode_info *SQUASHFS_I(struct inode *inode) ++{ ++ return list_entry(inode, struct squashfs_inode_info, vfs_inode); ++} ++ ++/* block.c */ ++extern unsigned int squashfs_read_data(struct super_block *, char *, ++ long long, unsigned int, long long *, int); ++extern int squashfs_get_cached_block(struct super_block *, void *, ++ long long, unsigned int, int, long long *, unsigned int *); ++ ++/* cache.c */ ++extern struct squashfs_cache_entry *squashfs_cache_get(struct super_block *, ++ struct squashfs_cache *, long long, int); ++extern void squashfs_cache_put(struct squashfs_cache *, ++ struct squashfs_cache_entry *); ++extern void squashfs_cache_delete(struct squashfs_cache *); ++extern struct squashfs_cache *squashfs_cache_init(char *, int, int, int); ++ ++/* export.c */ ++extern int read_inode_lookup_table(struct super_block *); ++ ++/* file.c */ ++extern long long read_blocklist(struct inode *, int, int, char *, ++ unsigned short **, unsigned int *); ++ ++/* fragment.c */ ++extern int get_fragment_location(struct super_block *, unsigned int, ++ long long *, unsigned int *); ++extern void release_cached_fragment(struct squashfs_sb_info *, ++ struct squashfs_cache_entry *); ++extern struct squashfs_cache_entry *get_cached_fragment(struct super_block *, ++ long long, int); ++extern int read_fragment_index_table(struct super_block *); ++ ++/* id.c */ ++extern int get_id(struct super_block *, unsigned int, unsigned int *); ++extern int read_id_index_table(struct super_block *); ++ ++/* inode.c */ ++extern struct inode *squashfs_iget(struct super_block *, squashfs_inode_t, ++ unsigned int); ++extern int squashfs_read_inode(struct inode *, squashfs_inode_t); ++ ++/* ++ * Inodes and files operations ++ */ ++ ++/* dir.c */ ++extern const struct file_operations squashfs_dir_ops; ++ ++/* export.c */ ++extern const struct export_operations squashfs_export_ops; ++ ++/* file.c */ ++extern const struct address_space_operations squashfs_aops; ++ ++/* namei.c */ ++extern const struct inode_operations squashfs_dir_inode_ops; ++ ++/* symlink.c */ ++extern const struct address_space_operations squashfs_symlink_aops; ++ ++#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++extern int squashfs_1_0_supported(struct squashfs_sb_info *msblk); ++#else ++static inline int squashfs_1_0_supported(struct squashfs_sb_info *msblk) ++{ ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_SQUASHFS_2_0_COMPATIBILITY ++extern int squashfs_2_0_supported(struct squashfs_sb_info *msblk); ++#else ++static inline int squashfs_2_0_supported(struct squashfs_sb_info *msblk) ++{ ++ return 0; ++} ++#endif +diff -uNr a/fs/squashfs/super.c b/fs/squashfs/super.c +--- a/fs/squashfs/super.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/super.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,381 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * super.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/exportfs.h> ++ ++#include "squashfs.h" ++ ++static struct file_system_type squashfs_fs_type; ++static struct super_operations squashfs_super_ops; ++ ++static int supported_squashfs_filesystem(struct squashfs_sb_info *msblk, int silent) ++{ ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ msblk->read_inode = squashfs_read_inode; ++ msblk->read_blocklist = read_blocklist; ++ msblk->read_fragment_index_table = read_fragment_index_table; ++ ++ if (sblk->s_major == 1) { ++ if (!squashfs_1_0_supported(msblk)) { ++ SERROR("Major/Minor mismatch, Squashfs 1.0 filesystems " ++ "are unsupported\n"); ++ SERROR("Please recompile with Squashfs 1.0 support enabled\n"); ++ return 0; ++ } ++ } else if (sblk->s_major == 2) { ++ if (!squashfs_2_0_supported(msblk)) { ++ SERROR("Major/Minor mismatch, Squashfs 2.0 filesystems " ++ "are unsupported\n"); ++ SERROR("Please recompile with Squashfs 2.0 support enabled\n"); ++ return 0; ++ } ++ } else if(sblk->s_major != SQUASHFS_MAJOR || sblk->s_minor > ++ SQUASHFS_MINOR) { ++ SERROR("Major/Minor mismatch, trying to mount newer %d.%d " ++ "filesystem\n", sblk->s_major, sblk->s_minor); ++ SERROR("Please update your kernel\n"); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++ ++static int squashfs_fill_super(struct super_block *s, void *data, int silent) ++{ ++ struct squashfs_sb_info *msblk; ++ struct squashfs_super_block *sblk; ++ char b[BDEVNAME_SIZE]; ++ struct inode *root; ++ ++ TRACE("Entered squashfs_fill_superblock\n"); ++ ++ s->s_fs_info = kzalloc(sizeof(struct squashfs_sb_info), GFP_KERNEL); ++ if (s->s_fs_info == NULL) { ++ ERROR("Failed to allocate superblock\n"); ++ goto failure; ++ } ++ msblk = s->s_fs_info; ++ ++ msblk->stream.workspace = vmalloc(zlib_inflate_workspacesize()); ++ if (msblk->stream.workspace == NULL) { ++ ERROR("Failed to allocate zlib workspace\n"); ++ goto failure; ++ } ++ sblk = &msblk->sblk; ++ ++ msblk->devblksize = sb_min_blocksize(s, BLOCK_SIZE); ++ msblk->devblksize_log2 = ffz(~msblk->devblksize); ++ ++ mutex_init(&msblk->read_data_mutex); ++ mutex_init(&msblk->read_page_mutex); ++ mutex_init(&msblk->meta_index_mutex); ++ ++ /* sblk->bytes_used is checked in squashfs_read_data to ensure reads are not ++ * beyond filesystem end. As we're using squashfs_read_data to read sblk here, ++ * first set sblk->bytes_used to a useful value */ ++ sblk->bytes_used = sizeof(struct squashfs_super_block); ++ if (!squashfs_read_data(s, (char *) sblk, SQUASHFS_START, ++ sizeof(struct squashfs_super_block) | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, sizeof(struct squashfs_super_block))) { ++ SERROR("unable to read superblock\n"); ++ goto failed_mount; ++ } ++ ++ /* Check it is a SQUASHFS superblock */ ++ if ((s->s_magic = sblk->s_magic) != SQUASHFS_MAGIC) { ++ if (sblk->s_magic == SQUASHFS_MAGIC_SWAP) { ++ struct squashfs_super_block ssblk; ++ ++ WARNING("Mounting a different endian SQUASHFS filesystem on %s\n", ++ bdevname(s->s_bdev, b)); ++ ++ //SQUASHFS_SWAP_SUPER_BLOCK(&ssblk, sblk); ++ memcpy(sblk, &ssblk, sizeof(struct squashfs_super_block)); ++ msblk->swap = 1; ++ } else { ++ SERROR("Can't find a SQUASHFS superblock on %s\n", ++ bdevname(s->s_bdev, b)); ++ goto failed_mount; ++ } ++ } ++ ++ /* Check the MAJOR & MINOR versions */ ++ if(!supported_squashfs_filesystem(msblk, silent)) ++ goto failed_mount; ++ ++ /* Check the filesystem does not extend beyond the end of the ++ block device */ ++ if(sblk->bytes_used < 0 || sblk->bytes_used > i_size_read(s->s_bdev->bd_inode)) ++ goto failed_mount; ++ ++ /* Check the root inode for sanity */ ++ if (SQUASHFS_INODE_OFFSET(sblk->root_inode) > SQUASHFS_METADATA_SIZE) ++ goto failed_mount; ++ ++ TRACE("Found valid superblock on %s\n", bdevname(s->s_bdev, b)); ++ TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(sblk->flags) ++ ? "un" : ""); ++ TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(sblk->flags) ++ ? "un" : ""); ++ TRACE("Check data is %spresent in the filesystem\n", ++ SQUASHFS_CHECK_DATA(sblk->flags) ? "" : "not "); ++ TRACE("Filesystem size %lld bytes\n", sblk->bytes_used); ++ TRACE("Block size %d\n", sblk->block_size); ++ TRACE("Number of inodes %d\n", sblk->inodes); ++ if (sblk->s_major > 1) ++ TRACE("Number of fragments %d\n", sblk->fragments); ++ TRACE("Number of ids %d\n", sblk->no_ids); ++ TRACE("sblk->inode_table_start %llx\n", sblk->inode_table_start); ++ TRACE("sblk->directory_table_start %llx\n", sblk->directory_table_start); ++ if (sblk->s_major > 1) ++ TRACE("sblk->fragment_table_start %llx\n", sblk->fragment_table_start); ++ TRACE("sblk->id_table_start %llx\n", sblk->id_table_start); ++ ++ s->s_maxbytes = MAX_LFS_FILESIZE; ++ s->s_flags |= MS_RDONLY; ++ s->s_op = &squashfs_super_ops; ++ ++ msblk->block_cache = squashfs_cache_init("metadata", SQUASHFS_CACHED_BLKS, ++ SQUASHFS_METADATA_SIZE, 0); ++ if (msblk->block_cache == NULL) ++ goto failed_mount; ++ ++ /* Allocate read_page block */ ++ msblk->read_page = vmalloc(sblk->block_size); ++ if (msblk->read_page == NULL) { ++ ERROR("Failed to allocate read_page block\n"); ++ goto failed_mount; ++ } ++ ++ /* Allocate and read id index table */ ++ if (read_id_index_table(s) == 0) ++ goto failed_mount; ++ ++ if (sblk->s_major == 1 && squashfs_1_0_supported(msblk)) ++ goto allocate_root; ++ ++ msblk->fragment_cache = squashfs_cache_init("fragment", ++ SQUASHFS_CACHED_FRAGMENTS, sblk->block_size, 1); ++ if (msblk->fragment_cache == NULL) ++ goto failed_mount; ++ ++ /* Allocate and read fragment index table */ ++ if (msblk->read_fragment_index_table(s) == 0) ++ goto failed_mount; ++ ++ if(sblk->s_major < 3 || sblk->lookup_table_start == SQUASHFS_INVALID_BLK) ++ goto allocate_root; ++ ++ /* Allocate and read inode lookup table */ ++ if (read_inode_lookup_table(s) == 0) ++ goto failed_mount; ++ ++ s->s_export_op = &squashfs_export_ops; ++ ++allocate_root: ++ root = new_inode(s); ++ if ((msblk->read_inode)(root, sblk->root_inode) == 0) ++ goto failed_mount; ++ insert_inode_hash(root); ++ ++ s->s_root = d_alloc_root(root); ++ if (s->s_root == NULL) { ++ ERROR("Root inode create failed\n"); ++ iput(root); ++ goto failed_mount; ++ } ++ ++ TRACE("Leaving squashfs_fill_super\n"); ++ return 0; ++ ++failed_mount: ++ kfree(msblk->inode_lookup_table); ++ kfree(msblk->fragment_index); ++ squashfs_cache_delete(msblk->fragment_cache); ++ kfree(msblk->id_table); ++ vfree(msblk->read_page); ++ squashfs_cache_delete(msblk->block_cache); ++ kfree(msblk->fragment_index_2); ++ vfree(msblk->stream.workspace); ++ kfree(s->s_fs_info); ++ s->s_fs_info = NULL; ++ return -EINVAL; ++ ++failure: ++ return -ENOMEM; ++} ++ ++ ++static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ TRACE("Entered squashfs_statfs\n"); ++ ++ buf->f_type = SQUASHFS_MAGIC; ++ buf->f_bsize = sblk->block_size; ++ buf->f_blocks = ((sblk->bytes_used - 1) >> sblk->block_log) + 1; ++ buf->f_bfree = buf->f_bavail = 0; ++ buf->f_files = sblk->inodes; ++ buf->f_ffree = 0; ++ buf->f_namelen = SQUASHFS_NAME_LEN; ++ ++ return 0; ++} ++ ++ ++static int squashfs_remount(struct super_block *s, int *flags, char *data) ++{ ++ *flags |= MS_RDONLY; ++ return 0; ++} ++ ++ ++static void squashfs_put_super(struct super_block *s) ++{ ++ if (s->s_fs_info) { ++ struct squashfs_sb_info *sbi = s->s_fs_info; ++ squashfs_cache_delete(sbi->block_cache); ++ squashfs_cache_delete(sbi->fragment_cache); ++ vfree(sbi->read_page); ++ kfree(sbi->id_table); ++ kfree(sbi->fragment_index); ++ kfree(sbi->fragment_index_2); ++ kfree(sbi->meta_index); ++ vfree(sbi->stream.workspace); ++ kfree(s->s_fs_info); ++ s->s_fs_info = NULL; ++ } ++} ++ ++ ++static int squashfs_get_sb(struct file_system_type *fs_type, int flags, ++ const char *dev_name, void *data, struct vfsmount *mnt) ++{ ++ return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, ++ mnt); ++} ++ ++ ++static struct kmem_cache * squashfs_inode_cachep; ++ ++ ++static void init_once(void *foo) ++{ ++ struct squashfs_inode_info *ei = foo; ++ ++ inode_init_once(&ei->vfs_inode); ++} ++ ++ ++static int __init init_inodecache(void) ++{ ++ squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", ++ sizeof(struct squashfs_inode_info), 0, ++ SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, init_once); ++ if (squashfs_inode_cachep == NULL) ++ return -ENOMEM; ++ return 0; ++} ++ ++ ++static void destroy_inodecache(void) ++{ ++ kmem_cache_destroy(squashfs_inode_cachep); ++} ++ ++ ++static int __init init_squashfs_fs(void) ++{ ++ int err = init_inodecache(); ++ if (err) ++ goto out; ++ ++ printk(KERN_INFO "squashfs: version 4.0-CVS (2008/07/27) " ++ "Phillip Lougher\n"); ++ ++ err = register_filesystem(&squashfs_fs_type); ++ if (err) ++ destroy_inodecache(); ++ ++out: ++ return err; ++} ++ ++ ++static void __exit exit_squashfs_fs(void) ++{ ++ unregister_filesystem(&squashfs_fs_type); ++ destroy_inodecache(); ++} ++ ++ ++static struct inode *squashfs_alloc_inode(struct super_block *sb) ++{ ++ struct squashfs_inode_info *ei; ++ ei = kmem_cache_alloc(squashfs_inode_cachep, GFP_KERNEL); ++ return ei ? &ei->vfs_inode : NULL; ++} ++ ++ ++static void squashfs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(squashfs_inode_cachep, SQUASHFS_I(inode)); ++} ++ ++ ++static struct file_system_type squashfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "squashfs", ++ .get_sb = squashfs_get_sb, ++ .kill_sb = kill_block_super, ++ .fs_flags = FS_REQUIRES_DEV ++}; ++ ++static struct super_operations squashfs_super_ops = { ++ .alloc_inode = squashfs_alloc_inode, ++ .destroy_inode = squashfs_destroy_inode, ++ .statfs = squashfs_statfs, ++ .put_super = squashfs_put_super, ++ .remount_fs = squashfs_remount ++}; ++ ++module_init(init_squashfs_fs); ++module_exit(exit_squashfs_fs); ++MODULE_DESCRIPTION("squashfs 4.0-CVS, a compressed read-only filesystem"); ++MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>"); ++MODULE_LICENSE("GPL"); +diff -uNr a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c +--- a/fs/squashfs/symlink.c 1969-12-31 16:00:00.000000000 -0800 ++++ b/fs/squashfs/symlink.c 2008-08-13 16:14:50.000000000 -0700 +@@ -0,0 +1,85 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * symlink.c ++ */ ++ ++#include <linux/fs.h> ++#include <linux/vfs.h> ++#include <linux/zlib.h> ++#include <linux/buffer_head.h> ++#include <linux/squashfs_fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++ ++#include "squashfs.h" ++ ++static int squashfs_symlink_readpage(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ int index = page->index << PAGE_CACHE_SHIFT; ++ long long block = SQUASHFS_I(inode)->start_block; ++ int offset = SQUASHFS_I(inode)->offset; ++ void *pageaddr = kmap(page); ++ int length, bytes, avail_bytes; ++ ++ TRACE("Entered squashfs_symlink_readpage, page index %ld, start block " ++ "%llx, offset %x\n", page->index, ++ SQUASHFS_I(inode)->start_block, ++ SQUASHFS_I(inode)->offset); ++ ++ for (length = 0; length < index; length += bytes) { ++ bytes = squashfs_get_cached_block(inode->i_sb, NULL, block, ++ offset, PAGE_CACHE_SIZE, &block, &offset); ++ if (bytes == 0) { ++ ERROR("Unable to read symbolic link [%llx:%x]\n", ++ block, offset); ++ goto skip_read; ++ } ++ } ++ ++ if (length != index) { ++ ERROR("(squashfs_symlink_readpage) length != index\n"); ++ bytes = 0; ++ goto skip_read; ++ } ++ ++ avail_bytes = min_t(int, i_size_read(inode) - length, PAGE_CACHE_SIZE); ++ ++ bytes = squashfs_get_cached_block(inode->i_sb, pageaddr, block, offset, ++ avail_bytes, &block, &offset); ++ if (bytes == 0) ++ ERROR("Unable to read symbolic link [%llx:%x]\n", block, ++ offset); ++ ++skip_read: ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ return 0; ++} ++ ++ ++const struct address_space_operations squashfs_symlink_aops = { ++ .readpage = squashfs_symlink_readpage ++}; +diff -uNr a/include/linux/squashfs_fs.h b/include/linux/squashfs_fs.h +--- a/include/linux/squashfs_fs.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/squashfs_fs.h 2008-08-13 16:16:05.000000000 -0700 +@@ -0,0 +1,949 @@ ++#ifndef SQUASHFS_FS ++#define SQUASHFS_FS ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs.h ++ */ ++ ++#if 0 ++#ifndef CONFIG_SQUASHFS_2_0_COMPATIBILITY ++#define CONFIG_SQUASHFS_2_0_COMPATIBILITY ++#endif ++#endif ++ ++#define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE ++#define SQUASHFS_MAJOR 4 ++#define SQUASHFS_MINOR 0 ++#define SQUASHFS_MAGIC 0x73717368 ++#define SQUASHFS_MAGIC_SWAP 0x68737173 ++#define SQUASHFS_START 0 ++ ++/* size of metadata (inode and directory) blocks */ ++#define SQUASHFS_METADATA_SIZE 8192 ++#define SQUASHFS_METADATA_LOG 13 ++ ++/* default size of data blocks */ ++#define SQUASHFS_FILE_SIZE 131072 ++#define SQUASHFS_FILE_LOG 17 ++ ++#define SQUASHFS_FILE_MAX_SIZE 1048576 ++ ++/* Max number of uids and gids */ ++#define SQUASHFS_IDS 65536 ++ ++/* Max length of filename (not 255) */ ++#define SQUASHFS_NAME_LEN 256 ++ ++#define SQUASHFS_INVALID ((long long) 0xffffffffffff) ++#define SQUASHFS_INVALID_FRAG ((unsigned int) 0xffffffff) ++#define SQUASHFS_INVALID_BLK ((long long) -1) ++#define SQUASHFS_USED_BLK ((long long) -2) ++ ++/* Filesystem flags */ ++#define SQUASHFS_NOI 0 ++#define SQUASHFS_NOD 1 ++#define SQUASHFS_CHECK 2 ++#define SQUASHFS_NOF 3 ++#define SQUASHFS_NO_FRAG 4 ++#define SQUASHFS_ALWAYS_FRAG 5 ++#define SQUASHFS_DUPLICATE 6 ++#define SQUASHFS_EXPORT 7 ++ ++#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) ++ ++#define SQUASHFS_UNCOMPRESSED_INODES(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NOI) ++ ++#define SQUASHFS_UNCOMPRESSED_DATA(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NOD) ++ ++#define SQUASHFS_UNCOMPRESSED_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NOF) ++ ++#define SQUASHFS_NO_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NO_FRAG) ++ ++#define SQUASHFS_ALWAYS_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_ALWAYS_FRAG) ++ ++#define SQUASHFS_DUPLICATES(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_DUPLICATE) ++ ++#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_EXPORT) ++ ++#define SQUASHFS_CHECK_DATA(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_CHECK) ++ ++#define SQUASHFS_MKFLAGS(noi, nod, check_data, nof, no_frag, always_frag, \ ++ duplicate_checking, exportable) (noi | (nod << 1) | (check_data << 2) \ ++ | (nof << 3) | (no_frag << 4) | (always_frag << 5) | \ ++ (duplicate_checking << 6) | (exportable << 7)) ++ ++/* Max number of types and file types */ ++#define SQUASHFS_DIR_TYPE 1 ++#define SQUASHFS_FILE_TYPE 2 ++#define SQUASHFS_SYMLINK_TYPE 3 ++#define SQUASHFS_BLKDEV_TYPE 4 ++#define SQUASHFS_CHRDEV_TYPE 5 ++#define SQUASHFS_FIFO_TYPE 6 ++#define SQUASHFS_SOCKET_TYPE 7 ++#define SQUASHFS_LDIR_TYPE 8 ++#define SQUASHFS_LREG_TYPE 9 ++ ++/* 1.0 filesystem type definitions */ ++#define SQUASHFS_TYPES 5 ++#define SQUASHFS_IPC_TYPE 0 ++ ++/* Flag whether block is compressed or uncompressed, bit is set if block is ++ * uncompressed */ ++#define SQUASHFS_COMPRESSED_BIT (1 << 15) ++ ++#define SQUASHFS_COMPRESSED_SIZE(B) (((B) & ~SQUASHFS_COMPRESSED_BIT) ? \ ++ (B) & ~SQUASHFS_COMPRESSED_BIT : SQUASHFS_COMPRESSED_BIT) ++ ++#define SQUASHFS_COMPRESSED(B) (!((B) & SQUASHFS_COMPRESSED_BIT)) ++ ++#define SQUASHFS_COMPRESSED_BIT_BLOCK (1 << 24) ++ ++#define SQUASHFS_COMPRESSED_SIZE_BLOCK(B) ((B) & \ ++ ~SQUASHFS_COMPRESSED_BIT_BLOCK) ++ ++#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) ++ ++/* ++ * Inode number ops. Inodes consist of a compressed block number, and an ++ * uncompressed offset within that block ++ */ ++#define SQUASHFS_INODE_BLK(a) ((unsigned int) ((a) >> 16)) ++ ++#define SQUASHFS_INODE_OFFSET(a) ((unsigned int) ((a) & 0xffff)) ++ ++#define SQUASHFS_MKINODE(A, B) ((squashfs_inode_t)(((squashfs_inode_t) (A)\ ++ << 16) + (B))) ++ ++/* Compute 32 bit VFS inode number from squashfs inode number */ ++#define SQUASHFS_MK_VFS_INODE(a, b) ((unsigned int) (((a) << 8) + \ ++ ((b) >> 2) + 1)) ++/* XXX */ ++ ++/* Translate between VFS mode and squashfs mode */ ++#define SQUASHFS_MODE(a) ((a) & 0xfff) ++ ++/* fragment and fragment table defines */ ++#define SQUASHFS_FRAGMENT_BYTES(A) ((A) * sizeof(struct squashfs_fragment_entry)) ++ ++#define SQUASHFS_FRAGMENT_INDEX(A) (SQUASHFS_FRAGMENT_BYTES(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_OFFSET(A) (SQUASHFS_FRAGMENT_BYTES(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEXES(A) ((SQUASHFS_FRAGMENT_BYTES(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_BYTES(A) (SQUASHFS_FRAGMENT_INDEXES(A) *\ ++ sizeof(long long)) ++ ++/* inode lookup table defines */ ++#define SQUASHFS_LOOKUP_BYTES(A) ((A) * sizeof(squashfs_inode_t)) ++ ++#define SQUASHFS_LOOKUP_BLOCK(A) (SQUASHFS_LOOKUP_BYTES(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_LOOKUP_BLOCK_OFFSET(A) (SQUASHFS_LOOKUP_BYTES(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_LOOKUP_BLOCKS(A) ((SQUASHFS_LOOKUP_BYTES(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_LOOKUP_BLOCK_BYTES(A) (SQUASHFS_LOOKUP_BLOCKS(A) *\ ++ sizeof(long long)) ++ ++/* uid lookup table defines */ ++#define SQUASHFS_ID_BYTES(A) ((A) * sizeof(unsigned int)) ++ ++#define SQUASHFS_ID_BLOCK(A) (SQUASHFS_ID_BYTES(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_ID_BLOCK_OFFSET(A) (SQUASHFS_ID_BYTES(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_ID_BLOCKS(A) ((SQUASHFS_ID_BYTES(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ ++ sizeof(long long)) ++ ++/* cached data constants for filesystem */ ++#define SQUASHFS_CACHED_BLKS 8 ++ ++#define SQUASHFS_MAX_FILE_SIZE_LOG 64 ++ ++#define SQUASHFS_MAX_FILE_SIZE ((long long) 1 << \ ++ (SQUASHFS_MAX_FILE_SIZE_LOG - 2)) ++ ++#define SQUASHFS_MARKER_BYTE 0xff ++ ++/* meta index cache */ ++#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) ++#define SQUASHFS_META_ENTRIES 31 ++#define SQUASHFS_META_NUMBER 8 ++#define SQUASHFS_SLOTS 4 ++ ++struct meta_entry { ++ long long data_block; ++ unsigned int index_block; ++ unsigned short offset; ++ unsigned short pad; ++}; ++ ++struct meta_index { ++ unsigned int inode_number; ++ unsigned int offset; ++ unsigned short entries; ++ unsigned short skip; ++ unsigned short locked; ++ unsigned short pad; ++ struct meta_entry meta_entry[SQUASHFS_META_ENTRIES]; ++}; ++ ++ ++/* ++ * definitions for structures on disk ++ */ ++ ++typedef long long squashfs_block_t; ++typedef long long squashfs_inode_t; ++ ++#define COMPRESSION_ZLIB 1 ++ ++struct squashfs_super_block { ++ unsigned int s_magic; ++ unsigned int inodes; ++ unsigned int mkfs_time /* time of filesystem creation */; ++ unsigned int block_size; ++ unsigned int fragments; ++ unsigned short compression; ++ unsigned short block_log; ++ unsigned short flags; ++ unsigned short no_ids; ++ unsigned short s_major; ++ unsigned short s_minor; ++ squashfs_inode_t root_inode; ++ long long bytes_used; ++ long long id_table_start; ++ long long xattr_table_start; ++ long long inode_table_start; ++ long long directory_table_start; ++ long long fragment_table_start; ++ long long lookup_table_start; ++}; ++ ++struct squashfs_dir_index { ++ unsigned int index; ++ unsigned int start_block; ++ unsigned int size; ++ unsigned char name[0]; ++}; ++ ++#define SQUASHFS_BASE_INODE_HEADER \ ++ unsigned short inode_type; \ ++ unsigned short mode; \ ++ unsigned short uid; \ ++ unsigned short guid; \ ++ unsigned int mtime; \ ++ unsigned int inode_number; ++ ++struct squashfs_base_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++}; ++ ++struct squashfs_ipc_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++}; ++ ++struct squashfs_dev_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned int rdev; ++}; ++ ++struct squashfs_symlink_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned int symlink_size; ++ char symlink[0]; ++}; ++ ++struct squashfs_reg_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int start_block; ++ unsigned int fragment; ++ unsigned int offset; ++ unsigned int file_size; ++ unsigned short block_list[0]; ++}; ++ ++struct squashfs_lreg_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ squashfs_block_t start_block; ++ long long file_size; ++ long long sparse; ++ unsigned int nlink; ++ unsigned int fragment; ++ unsigned int offset; ++ unsigned int xattr; ++ unsigned short block_list[0]; ++}; ++ ++struct squashfs_dir_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int start_block; ++ unsigned int nlink; ++ unsigned short file_size; ++ unsigned short offset; ++ unsigned int parent_inode; ++}; ++ ++struct squashfs_ldir_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned int file_size; ++ unsigned int start_block; ++ unsigned int parent_inode; ++ unsigned short i_count; ++ unsigned short offset; ++ struct squashfs_dir_index index[0]; ++}; ++ ++union squashfs_inode_header { ++ struct squashfs_base_inode_header base; ++ struct squashfs_dev_inode_header dev; ++ struct squashfs_symlink_inode_header symlink; ++ struct squashfs_reg_inode_header reg; ++ struct squashfs_lreg_inode_header lreg; ++ struct squashfs_dir_inode_header dir; ++ struct squashfs_ldir_inode_header ldir; ++ struct squashfs_ipc_inode_header ipc; ++}; ++ ++struct squashfs_dir_entry { ++ unsigned short offset; ++ short inode_number; ++ unsigned short type; ++ unsigned short size; ++ char name[0]; ++}; ++ ++struct squashfs_dir_header { ++ unsigned int count; ++ unsigned int start_block; ++ unsigned int inode_number; ++}; ++ ++struct squashfs_fragment_entry { ++ long long start_block; ++ unsigned int size; ++ unsigned int unused; ++}; ++ ++extern int squashfs_uncompress_block(void *d, int dstlen, void *s, int srclen); ++extern int squashfs_uncompress_init(void); ++extern int squashfs_uncompress_exit(void); ++ ++/* ++ * macros to convert each packed bitfield structure from little endian to big ++ * endian and vice versa. These are needed when creating or using a filesystem ++ * on a machine with different byte ordering to the target architecture. ++ * ++ */ ++ ++#define SQUASHFS_SWAP_START \ ++ int bits;\ ++ int b_pos;\ ++ unsigned long long val;\ ++ unsigned char *s;\ ++ unsigned char *d; ++ ++#define SQUASHFS_SWAP_SUPER_BLOCK(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_super_block));\ ++ SQUASHFS_SWAP((s)->s_magic, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->inodes, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->bytes_used_2, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->uid_start_2, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->guid_start_2, d, 128, 32);\ ++ SQUASHFS_SWAP((s)->inode_table_start_2, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->directory_table_start_2, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->s_major, d, 224, 16);\ ++ SQUASHFS_SWAP((s)->s_minor, d, 240, 16);\ ++ SQUASHFS_SWAP((s)->block_size_1, d, 256, 16);\ ++ SQUASHFS_SWAP((s)->block_log, d, 272, 16);\ ++ SQUASHFS_SWAP((s)->flags, d, 288, 8);\ ++ SQUASHFS_SWAP((s)->no_uids, d, 296, 8);\ ++ SQUASHFS_SWAP((s)->no_guids, d, 304, 8);\ ++ SQUASHFS_SWAP((s)->mkfs_time, d, 312, 32);\ ++ SQUASHFS_SWAP((s)->root_inode, d, 344, 64);\ ++ SQUASHFS_SWAP((s)->block_size, d, 408, 32);\ ++ SQUASHFS_SWAP((s)->fragments, d, 440, 32);\ ++ SQUASHFS_SWAP((s)->fragment_table_start_2, d, 472, 32);\ ++ SQUASHFS_SWAP((s)->bytes_used, d, 504, 64);\ ++ SQUASHFS_SWAP((s)->uid_start, d, 568, 64);\ ++ SQUASHFS_SWAP((s)->guid_start, d, 632, 64);\ ++ SQUASHFS_SWAP((s)->inode_table_start, d, 696, 64);\ ++ SQUASHFS_SWAP((s)->directory_table_start, d, 760, 64);\ ++ SQUASHFS_SWAP((s)->fragment_table_start, d, 824, 64);\ ++ SQUASHFS_SWAP((s)->lookup_table_start, d, 888, 64);\ ++} ++ ++#define SQUASHFS_SWAP_BASE_INODE_CORE(s, d, n)\ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->guid, d, 24, 8);\ ++ SQUASHFS_SWAP((s)->mtime, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->inode_number, d, 64, 32); ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, n) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, n)\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_ipc_inode_header))\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_dev_inode_header)); \ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->rdev, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_symlink_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_reg_inode_header));\ ++ SQUASHFS_SWAP((s)->start_block, d, 96, 64);\ ++ SQUASHFS_SWAP((s)->fragment, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 224, 32);\ ++} ++ ++#define SQUASHFS_SWAP_LREG_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_lreg_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 128, 64);\ ++ SQUASHFS_SWAP((s)->fragment, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 224, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 256, 64);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_dir_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 128, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 147, 13);\ ++ SQUASHFS_SWAP((s)->start_block, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->parent_inode, d, 192, 32);\ ++} ++ ++#define SQUASHFS_SWAP_LDIR_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_ldir_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 128, 27);\ ++ SQUASHFS_SWAP((s)->offset, d, 155, 13);\ ++ SQUASHFS_SWAP((s)->start_block, d, 168, 32);\ ++ SQUASHFS_SWAP((s)->i_count, d, 200, 16);\ ++ SQUASHFS_SWAP((s)->parent_inode, d, 216, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INDEX(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_index));\ ++ SQUASHFS_SWAP((s)->index, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->size, d, 64, 8);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_header));\ ++ SQUASHFS_SWAP((s)->count, d, 0, 8);\ ++ SQUASHFS_SWAP((s)->start_block, d, 8, 32);\ ++ SQUASHFS_SWAP((s)->inode_number, d, 40, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_ENTRY(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_entry));\ ++ SQUASHFS_SWAP((s)->offset, d, 0, 13);\ ++ SQUASHFS_SWAP((s)->type, d, 13, 3);\ ++ SQUASHFS_SWAP((s)->size, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->inode_number, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_ENTRY(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_fragment_entry));\ ++ SQUASHFS_SWAP((s)->start_block, d, 0, 64);\ ++ SQUASHFS_SWAP((s)->size, d, 64, 32);\ ++} ++ ++#define SQUASHFS_SWAP_INODE_T(s, d) SQUASHFS_SWAP_LONG_LONGS(s, d, 1) ++ ++#define SQUASHFS_SWAP_SHORTS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * 2);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ 16)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 16);\ ++} ++ ++#define SQUASHFS_SWAP_INTS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * 4);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ 32)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 32);\ ++} ++ ++#define SQUASHFS_SWAP_LONG_LONGS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * 8);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ 64)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 64);\ ++} ++ ++#define SQUASHFS_SWAP_DATA(s, d, n, bits) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * bits / 8);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ bits)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, bits);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_INDEXES(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n) ++#define SQUASHFS_SWAP_LOOKUP_BLOCKS(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n) ++#define SQUASHFS_SWAP_ID_BLOCKS(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n) ++ ++#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++ ++struct squashfs_base_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++} __attribute__ ((packed)); ++ ++struct squashfs_ipc_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int type:4; ++ unsigned int offset:4; ++} __attribute__ ((packed)); ++ ++struct squashfs_dev_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned short rdev; ++} __attribute__ ((packed)); ++ ++struct squashfs_symlink_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_reg_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int mtime; ++ unsigned int start_block; ++ unsigned int file_size:32; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++} __attribute__ ((packed)); ++ ++union squashfs_inode_header_1 { ++ struct squashfs_base_inode_header_1 base; ++ struct squashfs_dev_inode_header_1 dev; ++ struct squashfs_symlink_inode_header_1 symlink; ++ struct squashfs_reg_inode_header_1 reg; ++ struct squashfs_dir_inode_header_1 dir; ++ struct squashfs_ipc_inode_header_1 ipc; ++}; ++ ++#define SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, n) \ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 4);\ ++ SQUASHFS_SWAP((s)->guid, d, 20, 4); ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER_1(s, d, n) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, n)\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_ipc_inode_header_1));\ ++ SQUASHFS_SWAP((s)->type, d, 24, 4);\ ++ SQUASHFS_SWAP((s)->offset, d, 28, 4);\ ++} ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_dev_inode_header_1));\ ++ SQUASHFS_SWAP((s)->rdev, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_symlink_inode_header_1));\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_reg_inode_header_1));\ ++ SQUASHFS_SWAP((s)->mtime, d, 24, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 56, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 88, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_dir_inode_header_1));\ ++ SQUASHFS_SWAP((s)->file_size, d, 24, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 43, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 56, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 88, 24);\ ++} ++ ++#endif ++ ++#ifdef CONFIG_SQUASHFS_2_0_COMPATIBILITY ++ ++struct squashfs_dir_index_2 { ++ unsigned int index:27; ++ unsigned int start_block:29; ++ unsigned char size; ++ unsigned char name[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_base_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++} __attribute__ ((packed)); ++ ++struct squashfs_ipc_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++} __attribute__ ((packed)); ++ ++struct squashfs_dev_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned short rdev; ++} __attribute__ ((packed)); ++ ++struct squashfs_symlink_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_reg_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int mtime; ++ unsigned int start_block; ++ unsigned int fragment; ++ unsigned int offset; ++ unsigned int file_size:32; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++} __attribute__ ((packed)); ++ ++struct squashfs_ldir_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int file_size:27; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++ unsigned int i_count:16; ++ struct squashfs_dir_index_2 index[0]; ++} __attribute__ ((packed)); ++ ++union squashfs_inode_header_2 { ++ struct squashfs_base_inode_header_2 base; ++ struct squashfs_dev_inode_header_2 dev; ++ struct squashfs_symlink_inode_header_2 symlink; ++ struct squashfs_reg_inode_header_2 reg; ++ struct squashfs_dir_inode_header_2 dir; ++ struct squashfs_ldir_inode_header_2 ldir; ++ struct squashfs_ipc_inode_header_2 ipc; ++}; ++ ++struct squashfs_dir_header_2 { ++ unsigned int count:8; ++ unsigned int start_block:24; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_entry_2 { ++ unsigned int offset:13; ++ unsigned int type:3; ++ unsigned int size:8; ++ char name[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_fragment_entry_2 { ++ unsigned int start_block; ++ unsigned int size; ++} __attribute__ ((packed)); ++ ++#define SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, n)\ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->guid, d, 24, 8);\ ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER_2(s, d, n) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, n)\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER_2(s, d) \ ++ SQUASHFS_SWAP_BASE_INODE_HEADER_2(s, d, sizeof(struct squashfs_ipc_inode_header_2)) ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_dev_inode_header_2)); \ ++ SQUASHFS_SWAP((s)->rdev, d, 32, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_symlink_inode_header_2));\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 32, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_reg_inode_header_2));\ ++ SQUASHFS_SWAP((s)->mtime, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->fragment, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 128, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 160, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_dir_inode_header_2));\ ++ SQUASHFS_SWAP((s)->file_size, d, 32, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 51, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 96, 24);\ ++} ++ ++#define SQUASHFS_SWAP_LDIR_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_ldir_inode_header_2));\ ++ SQUASHFS_SWAP((s)->file_size, d, 32, 27);\ ++ SQUASHFS_SWAP((s)->offset, d, 59, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 72, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 104, 24);\ ++ SQUASHFS_SWAP((s)->i_count, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INDEX_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_index_2));\ ++ SQUASHFS_SWAP((s)->index, d, 0, 27);\ ++ SQUASHFS_SWAP((s)->start_block, d, 27, 29);\ ++ SQUASHFS_SWAP((s)->size, d, 56, 8);\ ++} ++#define SQUASHFS_SWAP_DIR_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_header_2));\ ++ SQUASHFS_SWAP((s)->count, d, 0, 8);\ ++ SQUASHFS_SWAP((s)->start_block, d, 8, 24);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_ENTRY_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_entry_2));\ ++ SQUASHFS_SWAP((s)->offset, d, 0, 13);\ ++ SQUASHFS_SWAP((s)->type, d, 13, 3);\ ++ SQUASHFS_SWAP((s)->size, d, 16, 8);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_ENTRY_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_fragment_entry_2));\ ++ SQUASHFS_SWAP((s)->start_block, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->size, d, 32, 32);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_INDEXES_2(s, d, n) SQUASHFS_SWAP_INTS(s, d, n) ++ ++/* fragment and fragment table defines */ ++#define SQUASHFS_FRAGMENT_BYTES_2(A) (A * sizeof(struct squashfs_fragment_entry_2)) ++ ++#define SQUASHFS_FRAGMENT_INDEX_2(A) (SQUASHFS_FRAGMENT_BYTES_2(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_OFFSET_2(A) (SQUASHFS_FRAGMENT_BYTES_2(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEXES_2(A) ((SQUASHFS_FRAGMENT_BYTES_2(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_BYTES_2(A) (SQUASHFS_FRAGMENT_INDEXES_2(A) *\ ++ sizeof(int)) ++ ++#endif ++ ++#ifdef __KERNEL__ ++ ++/* ++ * macros used to swap each structure entry, taking into account ++ * bitfields and different bitfield placing conventions on differing ++ * architectures ++ */ ++ ++#include <asm/byteorder.h> ++ ++#ifdef __BIG_ENDIAN ++ /* convert from little endian to big endian */ ++#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, \ ++ tbits, b_pos) ++#else ++ /* convert from big endian to little endian */ ++#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, \ ++ tbits, 64 - tbits - b_pos) ++#endif ++ ++#define _SQUASHFS_SWAP(value, p, pos, tbits, SHIFT) {\ ++ b_pos = pos % 8;\ ++ val = 0;\ ++ s = (unsigned char *)p + (pos / 8);\ ++ d = ((unsigned char *) &val) + 7;\ ++ for(bits = 0; bits < (tbits + b_pos); bits += 8) \ ++ *d-- = *s++;\ ++ value = (val >> (SHIFT))/* & ((1 << tbits) - 1)*/;\ ++} ++ ++#define SQUASHFS_MEMSET(s, d, n) memset(s, 0, n); ++ ++#endif ++#endif +diff -uNr a/include/linux/squashfs_fs_i.h b/include/linux/squashfs_fs_i.h +--- a/include/linux/squashfs_fs_i.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/squashfs_fs_i.h 2008-08-13 16:16:05.000000000 -0700 +@@ -0,0 +1,45 @@ ++#ifndef SQUASHFS_FS_I ++#define SQUASHFS_FS_I ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs_i.h ++ */ ++ ++struct squashfs_inode_info { ++ long long start_block; ++ unsigned int offset; ++ union { ++ struct { ++ long long fragment_start_block; ++ unsigned int fragment_size; ++ unsigned int fragment_offset; ++ long long block_list_start; ++ } s1; ++ struct { ++ long long directory_index_start; ++ unsigned int directory_index_offset; ++ unsigned int directory_index_count; ++ unsigned int parent_inode; ++ } s2; ++ } u; ++ struct inode vfs_inode; ++}; ++#endif +diff -uNr a/include/linux/squashfs_fs_sb.h b/include/linux/squashfs_fs_sb.h +--- a/include/linux/squashfs_fs_sb.h 1969-12-31 16:00:00.000000000 -0800 ++++ b/include/linux/squashfs_fs_sb.h 2008-08-13 16:16:05.000000000 -0700 +@@ -0,0 +1,78 @@ ++#ifndef SQUASHFS_FS_SB ++#define SQUASHFS_FS_SB ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 ++ * Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs_sb.h ++ */ ++ ++#include <linux/squashfs_fs.h> ++ ++struct squashfs_cache_entry { ++ long long block; ++ int length; ++ int locked; ++ long long next_index; ++ char pending; ++ char error; ++ int waiting; ++ wait_queue_head_t wait_queue; ++ char *data; ++}; ++ ++struct squashfs_cache { ++ char *name; ++ int entries; ++ int block_size; ++ int next_blk; ++ int waiting; ++ int unused_blks; ++ int use_vmalloc; ++ spinlock_t lock; ++ wait_queue_head_t wait_queue; ++ struct squashfs_cache_entry entry[0]; ++}; ++ ++struct squashfs_sb_info { ++ struct squashfs_super_block sblk; ++ int devblksize; ++ int devblksize_log2; ++ int swap; ++ struct squashfs_cache *block_cache; ++ struct squashfs_cache *fragment_cache; ++ int next_meta_index; ++ unsigned int *id_table; ++ long long *fragment_index; ++ unsigned int *fragment_index_2; ++ char *read_page; ++ struct mutex read_data_mutex; ++ struct mutex read_page_mutex; ++ struct mutex meta_index_mutex; ++ struct meta_index *meta_index; ++ z_stream stream; ++ long long *inode_lookup_table; ++ int (*read_inode)(struct inode *i, squashfs_inode_t \ ++ inode); ++ long long (*read_blocklist)(struct inode *inode, int \ ++ index, int readahead_blks, char *block_list, \ ++ unsigned short **block_p, unsigned int *bsize); ++ int (*read_fragment_index_table)(struct super_block *s); ++}; ++#endif diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0010_unionfs-2.4_for_2.6.27-rc1.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0010_unionfs-2.4_for_2.6.27-rc1.patch new file mode 100644 index 000000000..8a2e83aa6 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0010_unionfs-2.4_for_2.6.27-rc1.patch @@ -0,0 +1,11320 @@ +diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX +index 52cd611..bc6b437 100644 +--- a/Documentation/filesystems/00-INDEX ++++ b/Documentation/filesystems/00-INDEX +@@ -106,6 +106,8 @@ udf.txt + - info and mount options for the UDF filesystem. + ufs.txt + - info on the ufs filesystem. ++unionfs/ ++ - info on the unionfs filesystem + vfat.txt + - info on using the VFAT filesystem used in Windows NT and Windows 95 + vfs.txt +diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX +new file mode 100644 +index 0000000..96fdf67 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/00-INDEX +@@ -0,0 +1,10 @@ ++00-INDEX ++ - this file. ++concepts.txt ++ - A brief introduction of concepts. ++issues.txt ++ - A summary of known issues with unionfs. ++rename.txt ++ - Information regarding rename operations. ++usage.txt ++ - Usage information and examples. +diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt +new file mode 100644 +index 0000000..b853788 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/concepts.txt +@@ -0,0 +1,287 @@ ++Unionfs 2.x CONCEPTS: ++===================== ++ ++This file describes the concepts needed by a namespace unification file ++system. ++ ++ ++Branch Priority: ++================ ++ ++Each branch is assigned a unique priority - starting from 0 (highest ++priority). No two branches can have the same priority. ++ ++ ++Branch Mode: ++============ ++ ++Each branch is assigned a mode - read-write or read-only. This allows ++directories on media mounted read-write to be used in a read-only manner. ++ ++ ++Whiteouts: ++========== ++ ++A whiteout removes a file name from the namespace. Whiteouts are needed when ++one attempts to remove a file on a read-only branch. ++ ++Suppose we have a two-branch union, where branch 0 is read-write and branch ++1 is read-only. And a file 'foo' on branch 1: ++ ++./b0/ ++./b1/ ++./b1/foo ++ ++The unified view would simply be: ++ ++./union/ ++./union/foo ++ ++Since 'foo' is stored on a read-only branch, it cannot be removed. A ++whiteout is used to remove the name 'foo' from the unified namespace. Again, ++since branch 1 is read-only, the whiteout cannot be created there. So, we ++try on a higher priority (lower numerically) branch and create the whiteout ++there. ++ ++./b0/ ++./b0/.wh.foo ++./b1/ ++./b1/foo ++ ++Later, when Unionfs traverses branches (due to lookup or readdir), it ++eliminate 'foo' from the namespace (as well as the whiteout itself.) ++ ++ ++Opaque Directories: ++=================== ++ ++Assume we have a unionfs mount comprising of two branches. Branch 0 is ++empty; branch 1 has the directory /a and file /a/f. Let's say we mount a ++union of branch 0 as read-write and branch 1 as read-only. Now, let's say ++we try to perform the following operation in the union: ++ ++ rm -fr a ++ ++Because branch 1 is not writable, we cannot physically remove the file /a/f ++or the directory /a. So instead, we will create a whiteout in branch 0 ++named /.wh.a, masking out the name "a" from branch 1. Next, let's say we ++try to create a directory named "a" as follows: ++ ++ mkdir a ++ ++Because we have a whiteout for "a" already, Unionfs behaves as if "a" ++doesn't exist, and thus will delete the whiteout and replace it with an ++actual directory named "a". ++ ++The problem now is that if you try to "ls" in the union, Unionfs will ++perform is normal directory name unification, for *all* directories named ++"a" in all branches. This will cause the file /a/f from branch 1 to ++re-appear in the union's namespace, which violates Unix semantics. ++ ++To avoid this problem, we have a different form of whiteouts for ++directories, called "opaque directories" (same as BSD Union Mount does). ++Whenever we replace a whiteout with a directory, that directory is marked as ++opaque. In Unionfs 2.x, it means that we create a file named ++/a/.wh.__dir_opaque in branch 0, after having created directory /a there. ++When unionfs notices that a directory is opaque, it stops all namespace ++operations (including merging readdir contents) at that opaque directory. ++This prevents re-exposing names from masked out directories. ++ ++ ++Duplicate Elimination: ++====================== ++ ++It is possible for files on different branches to have the same name. ++Unionfs then has to select which instance of the file to show to the user. ++Given the fact that each branch has a priority associated with it, the ++simplest solution is to take the instance from the highest priority ++(numerically lowest value) and "hide" the others. ++ ++ ++Unlinking: ++========= ++ ++Unlink operation on non-directory instances is optimized to remove the ++maximum possible objects in case multiple underlying branches have the same ++file name. The unlink operation will first try to delete file instances ++from highest priority branch and then move further to delete from remaining ++branches in order of their decreasing priority. Consider a case (F..D..F), ++where F is a file and D is a directory of the same name; here, some ++intermediate branch could have an empty directory instance with the same ++name, so this operation also tries to delete this directory instance and ++proceed further to delete from next possible lower priority branch. The ++unionfs unlink operation will smoothly delete the files with same name from ++all possible underlying branches. In case if some error occurs, it creates ++whiteout in highest priority branch that will hide file instance in rest of ++the branches. An error could occur either if an unlink operations in any of ++the underlying branch failed or if a branch has no write permission. ++ ++This unlinking policy is known as "delete all" and it has the benefit of ++overall reducing the number of inodes used by duplicate files, and further ++reducing the total number of inodes consumed by whiteouts. The cost is of ++extra processing, but testing shows this extra processing is well worth the ++savings. ++ ++ ++Copyup: ++======= ++ ++When a change is made to the contents of a file's data or meta-data, they ++have to be stored somewhere. The best way is to create a copy of the ++original file on a branch that is writable, and then redirect the write ++though to this copy. The copy must be made on a higher priority branch so ++that lookup and readdir return this newer "version" of the file rather than ++the original (see duplicate elimination). ++ ++An entire unionfs mount can be read-only or read-write. If it's read-only, ++then none of the branches will be written to, even if some of the branches ++are physically writeable. If the unionfs mount is read-write, then the ++leftmost (highest priority) branch must be writeable (for copyup to take ++place); the remaining branches can be any mix of read-write and read-only. ++ ++In a writeable mount, unionfs will create new files/dir in the leftmost ++branch. If one tries to modify a file in a read-only branch/media, unionfs ++will copyup the file to the leftmost branch and modify it there. If you try ++to modify a file from a writeable branch which is not the leftmost branch, ++then unionfs will modify it in that branch; this is useful if you, say, ++unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want ++changes to specific package files to remain logically in the directory where ++they came from. ++ ++Cache Coherency: ++================ ++ ++Unionfs users often want to be able to modify files and directories directly ++on the lower branches, and have those changes be visible at the Unionfs ++level. This means that data (e.g., pages) and meta-data (dentries, inodes, ++open files, etc.) have to be synchronized between the upper and lower ++layers. In other words, the newest changes from a layer below have to be ++propagated to the Unionfs layer above. If the two layers are not in sync, a ++cache incoherency ensues, which could lead to application failures and even ++oopses. The Linux kernel, however, has a rather limited set of mechanisms ++to ensure this inter-layer cache coherency---so Unionfs has to do most of ++the hard work on its own. ++ ++Maintaining Invariants: ++ ++The way Unionfs ensures cache coherency is as follows. At each entry point ++to a Unionfs file system method, we call a utility function to validate the ++primary objects of this method. Generally, we call unionfs_file_revalidate ++on open files, and __unionfs_d_revalidate_chain on dentries (which also ++validates inodes). These utility functions check to see whether the upper ++Unionfs object is in sync with any of the lower objects that it represents. ++The checks we perform include whether the Unionfs superblock has a newer ++generation number, or if any of the lower objects mtime's or ctime's are ++newer. (Note: generation numbers change when branch-management commands are ++issued, so in a way, maintaining cache coherency is also very important for ++branch-management.) If indeed we determine that any Unionfs object is no ++longer in sync with its lower counterparts, then we rebuild that object ++similarly to how we do so for branch-management. ++ ++While rebuilding Unionfs's objects, we also purge any page mappings and ++truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data). This is to ++ensure that Unionfs will re-get the newer data from the lower branches. We ++perform this purging only if the Unionfs operation in question is a reading ++operation; if Unionfs is performing a data writing operation (e.g., ->write, ++->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is ++because (1) a self-deadlock could occur and (2) the upper Unionfs pages are ++considered more authoritative anyway, as they are newer and will overwrite ++any lower pages. ++ ++Unionfs maintains the following important invariant regarding mtime's, ++ctime's, and atime's: the upper inode object's times are the max() of all of ++the lower ones. For non-directory objects, there's only one object below, ++so the mapping is simple; for directory objects, there could me multiple ++lower objects and we have to sync up with the newest one of all the lower ++ones. This invariant is important to maintain, especially for directories ++(besides, we need this to be POSIX compliant). A union could comprise ++multiple writable branches, each of which could change. If we don't reflect ++the newest possible mtime/ctime, some applications could fail. For example, ++NFSv2/v3 exports check for newer directory mtimes on the server to determine ++if the client-side attribute cache should be purged. ++ ++To maintain these important invariants, of course, Unionfs carefully ++synchronizes upper and lower times in various places. For example, if we ++copy-up a file to a top-level branch, the parent directory where the file ++was copied up to will now have a new mtime: so after a successful copy-up, ++we sync up with the new top-level branch's parent directory mtime. ++ ++Implementation: ++ ++This cache-coherency implementation is efficient because it defers any ++synchronizing between the upper and lower layers until absolutely needed. ++Consider the example a common situation where users perform a lot of lower ++changes, such as untarring a whole package. While these take place, ++typically the user doesn't access the files via Unionfs; only after the ++lower changes are done, does the user try to access the lower files. With ++our cache-coherency implementation, the entirety of the changes to the lower ++branches will not result in a single CPU cycle spent at the Unionfs level ++until the user invokes a system call that goes through Unionfs. ++ ++We have considered two alternate cache-coherency designs. (1) Using the ++dentry/inode notify functionality to register interest in finding out about ++any lower changes. This is a somewhat limited and also a heavy-handed ++approach which could result in many notifications to the Unionfs layer upon ++each small change at the lower layer (imagine a file being modified multiple ++times in rapid succession). (2) Rewriting the VFS to support explicit ++callbacks from lower objects to upper objects. We began exploring such an ++implementation, but found it to be very complicated--it would have resulted ++in massive VFS/MM changes which are unlikely to be accepted by the LKML ++community. We therefore believe that our current cache-coherency design and ++implementation represent the best approach at this time. ++ ++Limitations: ++ ++Our implementation works in that as long as a user process will have caused ++Unionfs to be called, directly or indirectly, even to just do ++->d_revalidate; then we will have purged the current Unionfs data and the ++process will see the new data. For example, a process that continually ++re-reads the same file's data will see the NEW data as soon as the lower ++file had changed, upon the next read(2) syscall (even if the file is still ++open!) However, this doesn't work when the process re-reads the open file's ++data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens ++it). Once we respond to ->readpage(s), then the kernel maps the page into ++the process's address space and there doesn't appear to be a way to force ++the kernel to invalidate those pages/mappings, and force the process to ++re-issue ->readpage. If there's a way to invalidate active mappings and ++force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do ++the trick). ++ ++Our current Unionfs code has to perform many file-revalidation calls. It ++would be really nice if the VFS would export an optional file system hook ++->file_revalidate (similarly to dentry->d_revalidate) that will be called ++before each VFS op that has a "struct file" in it. ++ ++Certain file systems have micro-second granularity (or better) for inode ++times, and asynchronous actions could cause those times to change with some ++small delay. In such cases, Unionfs may see a changed inode time that only ++differs by a tiny fraction of a second: such a change may be a false ++positive indication that the lower object has changed, whereas if unionfs ++waits a little longer, that false indication will not be seen. (These false ++positives are harmless, because they would at most cause unionfs to ++re-validate an object that may need no revalidation, and print a debugging ++message that clutters the console/logs.) Therefore, to minimize the chances ++of these situations, we delay the detection of changed times by a small ++factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3 ++seconds, as does NFS). This means that we will detect the change, only a ++couple of seconds later, if indeed the time change persists in the lower ++file object. This delayed detection has an added performance benefit: we ++reduce the number of times that unionfs has to revalidate objects, in case ++there's a lot of concurrent activity on both the upper and lower objects, ++for the same file(s). Lastly, this delayed time attribute detection is ++similar to how NFS clients operate (e.g., acregmin). ++ ++Finally, there is no way currently in Linux to prevent lower directories ++from being moved around (i.e., topology changes); there's no way to prevent ++modifications to directory sub-trees of whole file systems which are mounted ++read-write. It is therefore possible for in-flight operations in unionfs to ++take place, while a lower directory is being moved around. Therefore, if ++you try to, say, create a new file in a directory through unionfs, while the ++directory is being moved around directly, then the new file may get created ++in the new location where that directory was moved to. This is a somewhat ++similar behaviour in NFS: an NFS client could be creating a new file while ++th NFS server is moving th directory around; the file will get successfully ++created in the new location. (The one exception in unionfs is that if the ++branch is marked read-only by unionfs, then a copyup will take place.) ++ ++For more information, see <http://unionfs.filesystems.org/>. +diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt +new file mode 100644 +index 0000000..f4b7e7e +--- /dev/null ++++ b/Documentation/filesystems/unionfs/issues.txt +@@ -0,0 +1,28 @@ ++KNOWN Unionfs 2.x ISSUES: ++========================= ++ ++1. Unionfs should not use lookup_one_len() on the underlying f/s as it ++ confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the ++ lower file-system, this eliminates part of the problem. The remaining ++ calls to lookup_one_len may need to be changed to pass an intent. We are ++ currently introducing VFS changes to fs/namei.c's do_path_lookup() to ++ allow proper file lookup and opening in stackable file systems. ++ ++2. Lockdep (a debugging feature) isn't aware of stacking, and so it ++ incorrectly complains about locking problems. The problem boils down to ++ this: Lockdep considers all objects of a certain type to be in the same ++ class, for example, all inodes. Lockdep doesn't like to see a lock held ++ on two inodes within the same task, and warns that it could lead to a ++ deadlock. However, stackable file systems do precisely that: they lock ++ an upper object, and then a lower object, in a strict order to avoid ++ locking problems; in addition, Unionfs, as a fan-out file system, may ++ have to lock several lower inodes. We are currently looking into Lockdep ++ to see how to make it aware of stackable file systems. For now, we ++ temporarily disable lockdep when calling vfs methods on lower objects, ++ but only for those places where lockdep complained. While this solution ++ may seem unclean, it is not without precedent: other places in the kernel ++ also do similar temporary disabling, of course after carefully having ++ checked that it is the right thing to do. Anyway, you get any warnings ++ from Lockdep, please report them to the Unionfs maintainers. ++ ++For more information, see <http://unionfs.filesystems.org/>. +diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt +new file mode 100644 +index 0000000..e20bb82 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/rename.txt +@@ -0,0 +1,31 @@ ++Rename is a complex beast. The following table shows which rename(2) operations ++should succeed and which should fail. ++ ++o: success ++E: error (either unionfs or vfs) ++X: EXDEV ++ ++none = file does not exist ++file = file is a file ++dir = file is a empty directory ++child= file is a non-empty directory ++wh = file is a directory containing only whiteouts; this makes it logically ++ empty ++ ++ none file dir child wh ++file o o E E E ++dir o E o E o ++child X E X E X ++wh o E o E o ++ ++ ++Renaming directories: ++===================== ++ ++Whenever a empty (either physically or logically) directory is being renamed, ++the following sequence of events should take place: ++ ++1) Remove whiteouts from both source and destination directory ++2) Rename source to destination ++3) Make destination opaque to prevent anything under it from showing up ++ +diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt +new file mode 100644 +index 0000000..1adde69 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/usage.txt +@@ -0,0 +1,134 @@ ++Unionfs is a stackable unification file system, which can appear to merge ++the contents of several directories (branches), while keeping their physical ++content separate. Unionfs is useful for unified source tree management, ++merged contents of split CD-ROM, merged separate software package ++directories, data grids, and more. Unionfs allows any mix of read-only and ++read-write branches, as well as insertion and deletion of branches anywhere ++in the fan-out. To maintain Unix semantics, Unionfs handles elimination of ++duplicates, partial-error conditions, and more. ++ ++GENERAL SYNTAX ++============== ++ ++# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT ++ ++OPTIONS can be any legal combination of: ++ ++- ro # mount file system read-only ++- rw # mount file system read-write ++- remount # remount the file system (see Branch Management below) ++- incgen # increment generation no. (see Cache Consistency below) ++ ++BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs=" ++option, or (2) a list of individual branch manipulation commands, combined ++with the "remount" option, and is further described in the "Branch ++Management" section below. ++ ++The syntax for the "dirs=" mount option is: ++ ++ dirs=branch[=ro|=rw][:...] ++ ++The "dirs=" option takes a colon-delimited list of directories to compose ++the union, with an optional branch mode for each of those directories. ++Directories that come earlier (specified first, on the left) in the list ++have a higher precedence than those which come later. Additionally, ++read-only or read-write permissions of the branch can be specified by ++appending =ro or =rw (default) to each directory. See the Copyup section in ++concepts.txt, for a description of Unionfs's behavior when mixing read-only ++and read-write branches and mounts. ++ ++Syntax: ++ ++ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw] ++ ++Example: ++ ++ dirs=/writable_branch=rw:/read-only_branch=ro ++ ++ ++BRANCH MANAGEMENT ++================= ++ ++Once you mount your union for the first time, using the "dirs=" option, you ++can then change the union's overall mode or reconfigure the branches, using ++the remount option, as follows. ++ ++To downgrade a union from read-write to read-only: ++ ++# mount -t unionfs -o remount,ro none MOUNTPOINT ++ ++To upgrade a union from read-only to read-write: ++ ++# mount -t unionfs -o remount,rw none MOUNTPOINT ++ ++To delete a branch /foo, regardless where it is in the current union: ++ ++# mount -t unionfs -o remount,del=/foo none MOUNTPOINT ++ ++To insert (add) a branch /foo before /bar: ++ ++# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT ++ ++To insert (add) a branch /foo (with the "rw" mode flag) before /bar: ++ ++# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT ++ ++To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a ++new highest-priority branch), you can use the above syntax, or use a short ++hand version as follows: ++ ++# mount -t unionfs -o remount,add=/foo none MOUNTPOINT ++ ++To append a branch to the very end (new lowest-priority branch): ++ ++# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT ++ ++To append a branch to the very end (new lowest-priority branch), in ++read-only mode: ++ ++# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT ++ ++Finally, to change the mode of one existing branch, say /foo, from read-only ++to read-write, and change /bar from read-write to read-only: ++ ++# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT ++ ++Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because ++then Unionfs won't have any writable place for copyups to take place. ++Moreover, the VFS can get confused when it tries to modify something in a ++file system mounted read-write, but isn't permitted to write to it. ++Instead, you should set the whole union as readonly, as described above. ++If, however, you must set the leftmost branch as readonly, perhaps so you ++can get a snapshot of it at a point in time, then you should insert a new ++writable top-level branch, and mark the one you want as readonly. This can ++be accomplished as follows, assuming that /foo is your current leftmost ++branch: ++ ++# mount -t tmpfs -o size=NNN /new ++# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT ++<do what you want safely in /foo> ++# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT ++<check if there's anything in /new you want to preserve> ++# umount /new ++ ++CACHE CONSISTENCY ++================= ++ ++If you modify any file on any of the lower branches directly, while there is ++a Unionfs 2.x mounted above any of those branches, you should tell Unionfs ++to purge its caches and re-get the objects. To do that, you have to ++increment the generation number of the superblock using the following ++command: ++ ++# mount -t unionfs -o remount,incgen none MOUNTPOINT ++ ++Note that the older way of incrementing the generation number using an ++ioctl, is no longer supported in Unionfs 2.0 and newer. Ioctls in general ++are not encouraged. Plus, an ioctl is per-file concept, whereas the ++generation number is a per-file-system concept. Worse, such an ioctl ++requires an open file, which then has to be invalidated by the very nature ++of the generation number increase (read: the old generation increase ioctl ++was pretty racy). ++ ++ ++For more information, see <http://unionfs.filesystems.org/>. +diff --git a/MAINTAINERS b/MAINTAINERS +index deedc0d..c722f8e 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -4173,6 +4173,14 @@ L: linux-kernel@vger.kernel.org + W: http://www.kernel.dk + S: Maintained + ++UNIONFS ++P: Erez Zadok ++M: ezk@cs.sunysb.edu ++L: unionfs@filesystems.org ++W: http://unionfs.filesystems.org ++T: git git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git ++S: Maintained ++ + USB ACM DRIVER + P: Oliver Neukum + M: oliver@neukum.name +diff --git a/fs/Kconfig b/fs/Kconfig +index d387358..31610a2 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -981,6 +981,47 @@ config CONFIGFS_FS + + endmenu + ++menu "Layered filesystems" ++ ++config ECRYPT_FS ++ tristate "eCrypt filesystem layer support (EXPERIMENTAL)" ++ depends on EXPERIMENTAL && KEYS && CRYPTO && NET ++ help ++ Encrypted filesystem that operates on the VFS layer. See ++ <file:Documentation/filesystems/ecryptfs.txt> to learn more about ++ eCryptfs. Userspace components are required and can be ++ obtained from <http://ecryptfs.sf.net>. ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ecryptfs. ++ ++config UNION_FS ++ tristate "Union file system (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ help ++ Unionfs is a stackable unification file system, which appears to ++ merge the contents of several directories (branches), while keeping ++ their physical content separate. ++ ++ See <http://unionfs.filesystems.org> for details ++ ++config UNION_FS_XATTR ++ bool "Unionfs extended attributes" ++ depends on UNION_FS ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page). ++ ++ If unsure, say N. ++ ++config UNION_FS_DEBUG ++ bool "Debug Unionfs" ++ depends on UNION_FS ++ help ++ If you say Y here, you can turn on debugging output from Unionfs. ++ ++endmenu ++ + menu "Miscellaneous filesystems" + + config ADFS_FS +@@ -1033,18 +1074,6 @@ config AFFS_FS + To compile this file system support as a module, choose M here: the + module will be called affs. If unsure, say N. + +-config ECRYPT_FS +- tristate "eCrypt filesystem layer support (EXPERIMENTAL)" +- depends on EXPERIMENTAL && KEYS && CRYPTO && NET +- help +- Encrypted filesystem that operates on the VFS layer. See +- <file:Documentation/filesystems/ecryptfs.txt> to learn more about +- eCryptfs. Userspace components are required and can be +- obtained from <http://ecryptfs.sf.net>. +- +- To compile this file system support as a module, choose M here: the +- module will be called ecryptfs. +- + config HFS_FS + tristate "Apple Macintosh file system support (EXPERIMENTAL)" + depends on BLOCK && EXPERIMENTAL +diff --git a/fs/Makefile b/fs/Makefile +index a1482a5..9bf3915 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -86,6 +86,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/ + obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ + obj-$(CONFIG_HFS_FS) += hfs/ + obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ ++obj-$(CONFIG_UNION_FS) += unionfs/ + obj-$(CONFIG_VXFS_FS) += freevxfs/ + obj-$(CONFIG_NFS_FS) += nfs/ + obj-$(CONFIG_EXPORTFS) += exportfs/ +diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c +index 5e59658..4621f89 100644 +--- a/fs/ecryptfs/dentry.c ++++ b/fs/ecryptfs/dentry.c +@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) + struct inode *lower_inode = + ecryptfs_inode_to_lower(dentry->d_inode); + +- fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); ++ fsstack_copy_attr_all(dentry->d_inode, lower_inode); + } + out: + return rc; +diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c +index 89209f0..d99a83e 100644 +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -589,9 +589,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, + lower_new_dir_dentry->d_inode, lower_new_dentry); + if (rc) + goto out_lock; +- fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); ++ fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); + if (new_dir != old_dir) +- fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); ++ fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); + out_lock: + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_new_dentry->d_parent); +@@ -913,7 +913,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) + rc = notify_change(lower_dentry, ia); + mutex_unlock(&lower_dentry->d_inode->i_mutex); + out: +- fsstack_copy_attr_all(inode, lower_inode, NULL); ++ fsstack_copy_attr_all(inode, lower_inode); + return rc; + } + +diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c +index 448dfd5..db2db5d 100644 +--- a/fs/ecryptfs/main.c ++++ b/fs/ecryptfs/main.c +@@ -197,7 +197,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, + d_add(dentry, inode); + else + d_instantiate(dentry, inode); +- fsstack_copy_attr_all(inode, lower_inode, NULL); ++ fsstack_copy_attr_all(inode, lower_inode); + /* This size will be overwritten for real files w/ headers and + * other metadata */ + fsstack_copy_inode_size(inode, lower_inode); +diff --git a/fs/namei.c b/fs/namei.c +index a7b0a0b..d05ee31 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -392,6 +392,7 @@ void release_open_intent(struct nameidata *nd) + else + fput(nd->intent.open.file); + } ++EXPORT_SYMBOL_GPL(release_open_intent); + + static inline struct dentry * + do_revalidate(struct dentry *dentry, struct nameidata *nd) +diff --git a/fs/splice.c b/fs/splice.c +index b30311b..204bb3c 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -887,8 +887,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); + /* + * Attempt to initiate a splice from pipe to file. + */ +-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, +- loff_t *ppos, size_t len, unsigned int flags) ++long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out, ++ loff_t *ppos, size_t len, unsigned int flags) + { + int ret; + +@@ -904,13 +904,14 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, + + return out->f_op->splice_write(pipe, out, ppos, len, flags); + } ++EXPORT_SYMBOL_GPL(vfs_splice_from); + + /* + * Attempt to initiate a splice from a file to a pipe. + */ +-static long do_splice_to(struct file *in, loff_t *ppos, +- struct pipe_inode_info *pipe, size_t len, +- unsigned int flags) ++long vfs_splice_to(struct file *in, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) + { + int ret; + +@@ -926,6 +927,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, + + return in->f_op->splice_read(in, ppos, pipe, len, flags); + } ++EXPORT_SYMBOL_GPL(vfs_splice_to); + + /** + * splice_direct_to_actor - splices data directly between two non-pipes +@@ -995,7 +997,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, + size_t read_len; + loff_t pos = sd->pos, prev_pos = pos; + +- ret = do_splice_to(in, &pos, pipe, len, flags); ++ ret = vfs_splice_to(in, &pos, pipe, len, flags); + if (unlikely(ret <= 0)) + goto out_release; + +@@ -1054,7 +1056,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, + { + struct file *file = sd->u.file; + +- return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); ++ return vfs_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); + } + + /** +@@ -1128,7 +1130,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, + } else + off = &out->f_pos; + +- ret = do_splice_from(pipe, out, off, len, flags); ++ ret = vfs_splice_from(pipe, out, off, len, flags); + + if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) + ret = -EFAULT; +@@ -1149,7 +1151,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, + } else + off = &in->f_pos; + +- ret = do_splice_to(in, off, pipe, len, flags); ++ ret = vfs_splice_to(in, off, pipe, len, flags); + + if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) + ret = -EFAULT; +diff --git a/fs/stack.c b/fs/stack.c +index 67716f6..a66ff6c 100644 +--- a/fs/stack.c ++++ b/fs/stack.c +@@ -1,24 +1,82 @@ ++/* ++ * Copyright (c) 2006-2007 Erez Zadok ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006-2007 Stony Brook University ++ * Copyright (c) 2006-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ + #include <linux/module.h> + #include <linux/fs.h> + #include <linux/fs_stack.h> + +-/* does _NOT_ require i_mutex to be held. ++/* ++ * does _NOT_ require i_mutex to be held. + * + * This function cannot be inlined since i_size_{read,write} is rather + * heavy-weight on 32-bit systems + */ +-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) ++void fsstack_copy_inode_size(struct inode *dst, struct inode *src) + { +- i_size_write(dst, i_size_read((struct inode *)src)); +- dst->i_blocks = src->i_blocks; ++ loff_t i_size; ++ blkcnt_t i_blocks; ++ ++ /* ++ * i_size_read() includes its own seqlocking and protection from ++ * preemption (see include/linux/fs.h): we need nothing extra for ++ * that here, and prefer to avoid nesting locks than attempt to ++ * keep i_size and i_blocks in synch together. ++ */ ++ i_size = i_size_read(src); ++ ++ /* ++ * But if CONFIG_LSF (on 32-bit), we ought to make an effort to keep ++ * the two halves of i_blocks in synch despite SMP or PREEMPT - though ++ * stat's generic_fillattr() doesn't bother, and we won't be applying ++ * quotas (where i_blocks does become important) at the upper level. ++ * ++ * We don't actually know what locking is used at the lower level; but ++ * if it's a filesystem that supports quotas, it will be using i_lock ++ * as in inode_add_bytes(). tmpfs uses other locking, and its 32-bit ++ * is (just) able to exceed 2TB i_size with the aid of holes; but its ++ * i_blocks cannot carry into the upper long without almost 2TB swap - ++ * let's ignore that case. ++ */ ++ if (sizeof(i_blocks) > sizeof(long)) ++ spin_lock(&src->i_lock); ++ i_blocks = src->i_blocks; ++ if (sizeof(i_blocks) > sizeof(long)) ++ spin_unlock(&src->i_lock); ++ ++ /* ++ * If CONFIG_SMP on 32-bit, it's vital for fsstack_copy_inode_size() ++ * to hold some lock around i_size_write(), otherwise i_size_read() ++ * may spin forever (see include/linux/fs.h). We don't necessarily ++ * hold i_mutex when this is called, so take i_lock for that case. ++ * ++ * And if CONFIG_LSF (on 32-bit), continue our effort to keep the ++ * two halves of i_blocks in synch despite SMP or PREEMPT: use i_lock ++ * for that case too, and do both at once by combining the tests. ++ * ++ * There is none of this locking overhead in the 64-bit case. ++ */ ++ if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long)) ++ spin_lock(&dst->i_lock); ++ i_size_write(dst, i_size); ++ dst->i_blocks = i_blocks; ++ if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long)) ++ spin_unlock(&dst->i_lock); + } + EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); + +-/* copy all attributes; get_nlinks is optional way to override the i_nlink ++/* ++ * copy all attributes; get_nlinks is optional way to override the i_nlink + * copying + */ +-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, +- int (*get_nlinks)(struct inode *)) ++void fsstack_copy_attr_all(struct inode *dest, const struct inode *src) + { + dest->i_mode = src->i_mode; + dest->i_uid = src->i_uid; +@@ -29,14 +87,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; +- +- /* +- * Update the nlinks AFTER updating the above fields, because the +- * get_links callback may depend on them. +- */ +- if (!get_nlinks) +- dest->i_nlink = src->i_nlink; +- else +- dest->i_nlink = (*get_nlinks)(dest); ++ dest->i_nlink = src->i_nlink; + } + EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); +diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile +new file mode 100644 +index 0000000..fa04e30 +--- /dev/null ++++ b/fs/unionfs/Makefile +@@ -0,0 +1,17 @@ ++UNIONFS_VERSION="2.4 (for 2.6.27-rc1)" ++ ++EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\" ++ ++obj-$(CONFIG_UNION_FS) += unionfs.o ++ ++unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \ ++ rdstate.o copyup.o dirhelper.o rename.o unlink.o \ ++ lookup.o commonfops.o dirfops.o sioq.o mmap.o whiteout.o ++ ++unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o ++ ++unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o ++ ++ifeq ($(CONFIG_UNION_FS_DEBUG),y) ++EXTRA_CFLAGS += -DDEBUG ++endif +diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c +new file mode 100644 +index 0000000..5861970 +--- /dev/null ++++ b/fs/unionfs/commonfops.c +@@ -0,0 +1,905 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * 1) Copyup the file ++ * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously ++ * stolen from NFS's silly rename ++ */ ++static int copyup_deleted_file(struct file *file, struct dentry *dentry, ++ int bstart, int bindex) ++{ ++ static unsigned int counter; ++ const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2; ++ const int countersize = sizeof(counter) * 2; ++ const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1; ++ char name[nlen + 1]; ++ int err; ++ struct dentry *tmp_dentry = NULL; ++ struct dentry *lower_dentry; ++ struct dentry *lower_dir_dentry = NULL; ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bstart); ++ ++ sprintf(name, ".unionfs%*.*lx", ++ i_inosize, i_inosize, lower_dentry->d_inode->i_ino); ++ ++ /* ++ * Loop, looking for an unused temp name to copyup to. ++ * ++ * It's somewhat silly that we look for a free temp tmp name in the ++ * source branch (bstart) instead of the dest branch (bindex), where ++ * the final name will be created. We _will_ catch it if somehow ++ * the name exists in the dest branch, but it'd be nice to catch it ++ * sooner than later. ++ */ ++retry: ++ tmp_dentry = NULL; ++ do { ++ char *suffix = name + nlen - countersize; ++ ++ dput(tmp_dentry); ++ counter++; ++ sprintf(suffix, "%*.*x", countersize, countersize, counter); ++ ++ pr_debug("unionfs: trying to rename %s to %s\n", ++ dentry->d_name.name, name); ++ ++ tmp_dentry = lookup_one_len(name, lower_dentry->d_parent, ++ nlen); ++ if (IS_ERR(tmp_dentry)) { ++ err = PTR_ERR(tmp_dentry); ++ goto out; ++ } ++ } while (tmp_dentry->d_inode != NULL); /* need negative dentry */ ++ dput(tmp_dentry); ++ ++ err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart, ++ bindex, ++ i_size_read(file->f_path.dentry->d_inode)); ++ if (err) { ++ if (unlikely(err == -EEXIST)) ++ goto retry; ++ goto out; ++ } ++ ++ /* bring it to the same state as an unlinked file */ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry)); ++ if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) { ++ atomic_inc(&lower_dentry->d_inode->i_count); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ lower_dentry->d_inode); ++ } ++ lower_dir_dentry = lock_parent(lower_dentry); ++ err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); ++ unlock_dir(lower_dir_dentry); ++ ++out: ++ if (!err) ++ unionfs_check_dentry(dentry); ++ return err; ++} ++ ++/* ++ * put all references held by upper struct file and free lower file pointer ++ * array ++ */ ++static void cleanup_file(struct file *file) ++{ ++ int bindex, bstart, bend; ++ struct file **lower_files; ++ struct file *lower_file; ++ struct super_block *sb = file->f_path.dentry->d_sb; ++ ++ lower_files = UNIONFS_F(file)->lower_files; ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ int i; /* holds (possibly) updated branch index */ ++ int old_bid; ++ ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ if (!lower_file) ++ continue; ++ ++ /* ++ * Find new index of matching branch with an open ++ * file, since branches could have been added or ++ * deleted causing the one with open files to shift. ++ */ ++ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex]; ++ i = branch_id_to_idx(sb, old_bid); ++ if (unlikely(i < 0)) { ++ printk(KERN_ERR "unionfs: no superblock for " ++ "file %p\n", file); ++ continue; ++ } ++ ++ /* decrement count of open files */ ++ branchput(sb, i); ++ /* ++ * fput will perform an mntput for us on the correct branch. ++ * Although we're using the file's old branch configuration, ++ * bindex, which is the old index, correctly points to the ++ * right branch in the file's branch list. In other words, ++ * we're going to mntput the correct branch even if branches ++ * have been added/removed. ++ */ ++ fput(lower_file); ++ UNIONFS_F(file)->lower_files[bindex] = NULL; ++ UNIONFS_F(file)->saved_branch_ids[bindex] = -1; ++ } ++ ++ UNIONFS_F(file)->lower_files = NULL; ++ kfree(lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ /* set to NULL because caller needs to know if to kfree on error */ ++ UNIONFS_F(file)->saved_branch_ids = NULL; ++} ++ ++/* open all lower files for a given file */ ++static int open_all_files(struct file *file) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct file *lower_file; ++ struct dentry *lower_dentry; ++ struct dentry *dentry = file->f_path.dentry; ++ struct super_block *sb = dentry->d_sb; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ ++ dget(lower_dentry); ++ unionfs_mntget(dentry, bindex); ++ branchget(sb, bindex); ++ ++ lower_file = ++ dentry_open(lower_dentry, ++ unionfs_lower_mnt_idx(dentry, bindex), ++ file->f_flags); ++ if (IS_ERR(lower_file)) { ++ err = PTR_ERR(lower_file); ++ goto out; ++ } else { ++ unionfs_set_lower_file_idx(file, bindex, lower_file); ++ } ++ } ++out: ++ return err; ++} ++ ++/* open the highest priority file for a given upper file */ ++static int open_highest_file(struct file *file, bool willwrite) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct file *lower_file; ++ struct dentry *lower_dentry; ++ struct dentry *dentry = file->f_path.dentry; ++ struct inode *parent_inode = dentry->d_parent->d_inode; ++ struct super_block *sb = dentry->d_sb; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) { ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_file(parent_inode, file, bstart, bindex, ++ i_size_read(dentry->d_inode)); ++ if (!err) ++ break; ++ } ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(dentry->d_inode)-> ++ generation)); ++ goto out; ++ } ++ ++ dget(lower_dentry); ++ unionfs_mntget(dentry, bstart); ++ lower_file = dentry_open(lower_dentry, ++ unionfs_lower_mnt_idx(dentry, bstart), ++ file->f_flags); ++ if (IS_ERR(lower_file)) { ++ err = PTR_ERR(lower_file); ++ goto out; ++ } ++ branchget(sb, bstart); ++ unionfs_set_lower_file(file, lower_file); ++ /* Fix up the position. */ ++ lower_file->f_pos = file->f_pos; ++ ++ memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state)); ++out: ++ return err; ++} ++ ++/* perform a delayed copyup of a read-write file on a read-only branch */ ++static int do_delayed_copyup(struct file *file) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct dentry *dentry = file->f_path.dentry; ++ struct inode *parent_inode = dentry->d_parent->d_inode; ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ BUG_ON(!S_ISREG(dentry->d_inode->i_mode)); ++ ++ unionfs_check_file(file); ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ if (!d_deleted(dentry)) ++ err = copyup_file(parent_inode, file, bstart, ++ bindex, ++ i_size_read(dentry->d_inode)); ++ else ++ err = copyup_deleted_file(file, dentry, bstart, ++ bindex); ++ /* if succeeded, set lower open-file flags and break */ ++ if (!err) { ++ struct file *lower_file; ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ lower_file->f_flags = file->f_flags; ++ break; ++ } ++ } ++ if (err || (bstart <= fbstart(file))) ++ goto out; ++ bend = fbend(file); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (unionfs_lower_file_idx(file, bindex)) { ++ branchput(dentry->d_sb, bindex); ++ fput(unionfs_lower_file_idx(file, bindex)); ++ unionfs_set_lower_file_idx(file, bindex, NULL); ++ } ++ } ++ path_put_lowers(dentry, bstart, bend, false); ++ iput_lowers(dentry->d_inode, bstart, bend, false); ++ /* for reg file, we only open it "once" */ ++ fbend(file) = fbstart(file); ++ dbend(dentry) = dbstart(dentry); ++ ibend(dentry->d_inode) = ibstart(dentry->d_inode); ++ ++out: ++ unionfs_check_file(file); ++ return err; ++} ++ ++/* ++ * Helper function for unionfs_file_revalidate/locked. ++ * Expects dentry/parent to be locked already, and revalidated. ++ */ ++static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry, ++ struct super_block *sb, int sbgen, ++ int dgen, bool willwrite) ++{ ++ int fgen; ++ int bstart, bend, orig_brid; ++ int size; ++ int err = 0; ++ ++ fgen = atomic_read(&UNIONFS_F(file)->generation); ++ ++ /* ++ * There are two cases we are interested in. The first is if the ++ * generation is lower than the super-block. The second is if ++ * someone has copied up this file from underneath us, we also need ++ * to refresh things. ++ */ ++ if (d_deleted(dentry) || ++ (sbgen <= fgen && ++ dbstart(dentry) == fbstart(file) && ++ unionfs_lower_file(file))) ++ goto out_may_copyup; ++ ++ /* save orig branch ID */ ++ orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; ++ ++ /* First we throw out the existing files. */ ++ cleanup_file(file); ++ ++ /* Now we reopen the file(s) as in unionfs_open. */ ++ bstart = fbstart(file) = dbstart(dentry); ++ bend = fbend(file) = dbend(dentry); ++ ++ size = sizeof(struct file *) * sbmax(sb); ++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->lower_files)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ size = sizeof(int) * sbmax(sb); ++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ if (S_ISDIR(dentry->d_inode->i_mode)) { ++ /* We need to open all the files. */ ++ err = open_all_files(file); ++ if (err) ++ goto out; ++ } else { ++ int new_brid; ++ /* We only open the highest priority branch. */ ++ err = open_highest_file(file, willwrite); ++ if (err) ++ goto out; ++ new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; ++ if (unlikely(new_brid != orig_brid && sbgen > fgen)) { ++ /* ++ * If we re-opened the file on a different branch ++ * than the original one, and this was due to a new ++ * branch inserted, then update the mnt counts of ++ * the old and new branches accordingly. ++ */ ++ unionfs_mntget(dentry, bstart); ++ unionfs_mntput(sb->s_root, ++ branch_id_to_idx(sb, orig_brid)); ++ } ++ /* regular files have only one open lower file */ ++ fbend(file) = fbstart(file); ++ } ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(dentry->d_inode)->generation)); ++ ++out_may_copyup: ++ /* Copyup on the first write to a file on a readonly branch. */ ++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && ++ !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) && ++ is_robranch(dentry)) { ++ pr_debug("unionfs: do delay copyup of \"%s\"\n", ++ dentry->d_name.name); ++ err = do_delayed_copyup(file); ++ /* regular files have only one open lower file */ ++ if (!err && !S_ISDIR(dentry->d_inode->i_mode)) ++ fbend(file) = fbstart(file); ++ } ++ ++out: ++ if (err) { ++ kfree(UNIONFS_F(file)->lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ } else { ++ unionfs_check_file(file); ++ } ++ return err; ++} ++ ++/* ++ * Revalidate the struct file ++ * @file: file to revalidate ++ * @willwrite: true if caller may cause changes to the file; false otherwise. ++ * Caller must lock/unlock dentry's branch configuration. ++ */ ++int unionfs_file_revalidate(struct file *file, bool willwrite) ++{ ++ struct super_block *sb; ++ struct dentry *dentry; ++ int sbgen, dgen; ++ int err = 0; ++ ++ dentry = file->f_path.dentry; ++ sb = dentry->d_sb; ++ verify_locked(dentry); ++ ++ /* ++ * First revalidate the dentry inside struct file, ++ * but not unhashed dentries. ++ */ ++reval_dentry: ++ if (!d_deleted(dentry) && ++ !__unionfs_d_revalidate_chain(dentry, NULL, willwrite)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ sbgen = atomic_read(&UNIONFS_SB(sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ ++ if (unlikely(sbgen > dgen)) { ++ pr_debug("unionfs: retry dentry %s revalidation\n", ++ dentry->d_name.name); ++ schedule(); ++ goto reval_dentry; ++ } ++ BUG_ON(sbgen > dgen); ++ ++ err = __unionfs_file_revalidate(file, dentry, sb, ++ sbgen, dgen, willwrite); ++out: ++ return err; ++} ++ ++/* same as unionfs_file_revalidate, but parent dentry must be locked too */ ++int unionfs_file_revalidate_locked(struct file *file, bool willwrite) ++{ ++ struct super_block *sb; ++ struct dentry *dentry; ++ int sbgen, dgen; ++ int err = 0, valid; ++ ++ dentry = file->f_path.dentry; ++ sb = dentry->d_sb; ++ verify_locked(dentry); ++ verify_locked(dentry->d_parent); ++ ++ /* first revalidate (locked) parent, then child */ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; /* same as what real_lookup does */ ++ goto out; ++ } ++ ++reval_dentry: ++ if (!d_deleted(dentry) && ++ !__unionfs_d_revalidate_one_locked(dentry, NULL, willwrite)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ sbgen = atomic_read(&UNIONFS_SB(sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ ++ if (unlikely(sbgen > dgen)) { ++ pr_debug("unionfs: retry (locked) dentry %s revalidation\n", ++ dentry->d_name.name); ++ schedule(); ++ goto reval_dentry; ++ } ++ BUG_ON(sbgen > dgen); ++ ++ err = __unionfs_file_revalidate(file, dentry, sb, ++ sbgen, dgen, willwrite); ++out: ++ return err; ++} ++ ++/* unionfs_open helper function: open a directory */ ++static int __open_dir(struct inode *inode, struct file *file) ++{ ++ struct dentry *lower_dentry; ++ struct file *lower_file; ++ int bindex, bstart, bend; ++ struct vfsmount *mnt; ++ ++ bstart = fbstart(file) = dbstart(file->f_path.dentry); ++ bend = fbend(file) = dbend(file->f_path.dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = ++ unionfs_lower_dentry_idx(file->f_path.dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ ++ dget(lower_dentry); ++ unionfs_mntget(file->f_path.dentry, bindex); ++ mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex); ++ lower_file = dentry_open(lower_dentry, mnt, file->f_flags); ++ if (IS_ERR(lower_file)) ++ return PTR_ERR(lower_file); ++ ++ unionfs_set_lower_file_idx(file, bindex, lower_file); ++ ++ /* ++ * The branchget goes after the open, because otherwise ++ * we would miss the reference on release. ++ */ ++ branchget(inode->i_sb, bindex); ++ } ++ ++ return 0; ++} ++ ++/* unionfs_open helper function: open a file */ ++static int __open_file(struct inode *inode, struct file *file) ++{ ++ struct dentry *lower_dentry; ++ struct file *lower_file; ++ int lower_flags; ++ int bindex, bstart, bend; ++ ++ lower_dentry = unionfs_lower_dentry(file->f_path.dentry); ++ lower_flags = file->f_flags; ++ ++ bstart = fbstart(file) = dbstart(file->f_path.dentry); ++ bend = fbend(file) = dbend(file->f_path.dentry); ++ ++ /* ++ * check for the permission for lower file. If the error is ++ * COPYUP_ERR, copyup the file. ++ */ ++ if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) { ++ /* ++ * if the open will change the file, copy it up otherwise ++ * defer it. ++ */ ++ if (lower_flags & O_TRUNC) { ++ int size = 0; ++ int err = -EROFS; ++ ++ /* copyup the file */ ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_file( ++ file->f_path.dentry->d_parent->d_inode, ++ file, bstart, bindex, size); ++ if (!err) ++ break; ++ } ++ return err; ++ } else { ++ /* ++ * turn off writeable flags, to force delayed copyup ++ * by caller. ++ */ ++ lower_flags &= ~(OPEN_WRITE_FLAGS); ++ } ++ } ++ ++ dget(lower_dentry); ++ ++ /* ++ * dentry_open will decrement mnt refcnt if err. ++ * otherwise fput() will do an mntput() for us upon file close. ++ */ ++ unionfs_mntget(file->f_path.dentry, bstart); ++ lower_file = ++ dentry_open(lower_dentry, ++ unionfs_lower_mnt_idx(file->f_path.dentry, bstart), ++ lower_flags); ++ if (IS_ERR(lower_file)) ++ return PTR_ERR(lower_file); ++ ++ unionfs_set_lower_file(file, lower_file); ++ branchget(inode->i_sb, bstart); ++ ++ return 0; ++} ++ ++int unionfs_open(struct inode *inode, struct file *file) ++{ ++ int err = 0; ++ struct file *lower_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ int bindex = 0, bstart = 0, bend = 0; ++ int size; ++ int valid = 0; ++ ++ unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ if (dentry != dentry->d_parent) ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out_nofree; ++ } ++ ++ file->private_data = ++ kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file))) { ++ err = -ENOMEM; ++ goto out_nofree; ++ } ++ fbstart(file) = -1; ++ fbend(file) = -1; ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(inode)->generation)); ++ ++ size = sizeof(struct file *) * sbmax(inode->i_sb); ++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->lower_files)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ size = sizeof(int) * sbmax(inode->i_sb); ++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ bstart = fbstart(file) = dbstart(dentry); ++ bend = fbend(file) = dbend(dentry); ++ ++ /* ++ * open all directories and make the unionfs file struct point to ++ * these lower file structs ++ */ ++ if (S_ISDIR(inode->i_mode)) ++ err = __open_dir(inode, file); /* open a dir */ ++ else ++ err = __open_file(inode, file); /* open a file */ ++ ++ /* freeing the allocated resources, and fput the opened files */ ++ if (err) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ if (!lower_file) ++ continue; ++ ++ branchput(dentry->d_sb, bindex); ++ /* fput calls dput for lower_dentry */ ++ fput(lower_file); ++ } ++ } ++ ++out: ++ if (err) { ++ kfree(UNIONFS_F(file)->lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ kfree(UNIONFS_F(file)); ++ } ++out_nofree: ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_copy_attr_times(inode); ++ unionfs_check_file(file); ++ unionfs_check_inode(inode); ++ } ++ if (dentry != dentry->d_parent) ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(inode->i_sb); ++ return err; ++} ++ ++/* ++ * release all lower object references & free the file info structure ++ * ++ * No need to grab sb info's rwsem. ++ */ ++int unionfs_file_release(struct inode *inode, struct file *file) ++{ ++ struct file *lower_file = NULL; ++ struct unionfs_file_info *fileinfo; ++ struct unionfs_inode_info *inodeinfo; ++ struct super_block *sb = inode->i_sb; ++ struct dentry *dentry = file->f_path.dentry; ++ int bindex, bstart, bend; ++ int fgen, err = 0; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ /* ++ * Yes, we have to revalidate this file even if it's being released. ++ * This is important for open-but-unlinked files, as well as mmap ++ * support. ++ */ ++ err = unionfs_file_revalidate(file, UNIONFS_F(file)->wrote_to_file); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ fileinfo = UNIONFS_F(file); ++ BUG_ON(file->f_path.dentry->d_inode != inode); ++ inodeinfo = UNIONFS_I(inode); ++ ++ /* fput all the lower files */ ++ fgen = atomic_read(&fileinfo->generation); ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ ++ if (lower_file) { ++ unionfs_set_lower_file_idx(file, bindex, NULL); ++ fput(lower_file); ++ branchput(sb, bindex); ++ } ++ ++ /* if there are no more refs to the dentry, dput it */ ++ if (d_deleted(dentry)) { ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } ++ } ++ ++ kfree(fileinfo->lower_files); ++ kfree(fileinfo->saved_branch_ids); ++ ++ if (fileinfo->rdstate) { ++ fileinfo->rdstate->access = jiffies; ++ spin_lock(&inodeinfo->rdlock); ++ inodeinfo->rdcount++; ++ list_add_tail(&fileinfo->rdstate->cache, ++ &inodeinfo->readdircache); ++ mark_inode_dirty(inode); ++ spin_unlock(&inodeinfo->rdlock); ++ fileinfo->rdstate = NULL; ++ } ++ kfree(fileinfo); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(sb); ++ return err; ++} ++ ++/* pass the ioctl to the lower fs */ ++static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ struct file *lower_file; ++ int err; ++ ++ lower_file = unionfs_lower_file(file); ++ ++ err = -ENOTTY; ++ if (!lower_file || !lower_file->f_op) ++ goto out; ++ if (lower_file->f_op->unlocked_ioctl) { ++ err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); ++ } else if (lower_file->f_op->ioctl) { ++ lock_kernel(); ++ err = lower_file->f_op->ioctl( ++ lower_file->f_path.dentry->d_inode, ++ lower_file, cmd, arg); ++ unlock_kernel(); ++ } ++ ++out: ++ return err; ++} ++ ++/* ++ * return to user-space the branch indices containing the file in question ++ * ++ * We use fd_set and therefore we are limited to the number of the branches ++ * to FD_SETSIZE, which is currently 1024 - plenty for most people ++ */ ++static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ fd_set branchlist; ++ int bstart = 0, bend = 0, bindex = 0; ++ int orig_bstart, orig_bend; ++ struct dentry *dentry, *lower_dentry; ++ struct vfsmount *mnt; ++ ++ dentry = file->f_path.dentry; ++ orig_bstart = dbstart(dentry); ++ orig_bend = dbend(dentry); ++ err = unionfs_partial_lookup(dentry); ++ if (err) ++ goto out; ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ FD_ZERO(&branchlist); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (likely(lower_dentry->d_inode)) ++ FD_SET(bindex, &branchlist); ++ /* purge any lower objects after partial_lookup */ ++ if (bindex < orig_bstart || bindex > orig_bend) { ++ dput(lower_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex)); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ NULL); ++ mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (!mnt) ++ continue; ++ unionfs_mntput(dentry, bindex); ++ unionfs_set_lower_mnt_idx(dentry, bindex, NULL); ++ } ++ } ++ /* restore original dentry's offsets */ ++ dbstart(dentry) = orig_bstart; ++ dbend(dentry) = orig_bend; ++ ibstart(dentry->d_inode) = orig_bstart; ++ ibend(dentry->d_inode) = orig_bend; ++ ++ err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set)); ++ if (unlikely(err)) ++ err = -EFAULT; ++ ++out: ++ return err < 0 ? err : bend; ++} ++ ++long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, true); ++ if (unlikely(err)) ++ goto out; ++ ++ /* check if asked for local commands */ ++ switch (cmd) { ++ case UNIONFS_IOCTL_INCGEN: ++ /* Increment the superblock generation count */ ++ pr_info("unionfs: incgen ioctl deprecated; " ++ "use \"-o remount,incgen\"\n"); ++ err = -ENOSYS; ++ break; ++ ++ case UNIONFS_IOCTL_QUERYFILE: ++ /* Return list of branches containing the given file */ ++ err = unionfs_ioctl_queryfile(file, cmd, arg); ++ break; ++ ++ default: ++ /* pass the ioctl down */ ++ err = do_ioctl(file, cmd, arg); ++ break; ++ } ++ ++out: ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++int unionfs_flush(struct file *file, fl_owner_t id) ++{ ++ int err = 0; ++ struct file *lower_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ int bindex, bstart, bend; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, UNIONFS_F(file)->wrote_to_file); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ ++ if (lower_file && lower_file->f_op && ++ lower_file->f_op->flush) { ++ err = lower_file->f_op->flush(lower_file, id); ++ if (err) ++ goto out; ++ } ++ ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c +new file mode 100644 +index 0000000..ae6ea2b +--- /dev/null ++++ b/fs/unionfs/copyup.c +@@ -0,0 +1,879 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * For detailed explanation of copyup see: ++ * Documentation/filesystems/unionfs/concepts.txt ++ */ ++ ++#ifdef CONFIG_UNION_FS_XATTR ++/* copyup all extended attrs for a given dentry */ ++static int copyup_xattrs(struct dentry *old_lower_dentry, ++ struct dentry *new_lower_dentry) ++{ ++ int err = 0; ++ ssize_t list_size = -1; ++ char *name_list = NULL; ++ char *attr_value = NULL; ++ char *name_list_buf = NULL; ++ ++ /* query the actual size of the xattr list */ ++ list_size = vfs_listxattr(old_lower_dentry, NULL, 0); ++ if (list_size <= 0) { ++ err = list_size; ++ goto out; ++ } ++ ++ /* allocate space for the actual list */ ++ name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX); ++ if (unlikely(!name_list || IS_ERR(name_list))) { ++ err = PTR_ERR(name_list); ++ goto out; ++ } ++ ++ name_list_buf = name_list; /* save for kfree at end */ ++ ++ /* now get the actual xattr list of the source file */ ++ list_size = vfs_listxattr(old_lower_dentry, name_list, list_size); ++ if (list_size <= 0) { ++ err = list_size; ++ goto out; ++ } ++ ++ /* allocate space to hold each xattr's value */ ++ attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX); ++ if (unlikely(!attr_value || IS_ERR(attr_value))) { ++ err = PTR_ERR(name_list); ++ goto out; ++ } ++ ++ /* in a loop, get and set each xattr from src to dst file */ ++ while (*name_list) { ++ ssize_t size; ++ ++ /* Lock here since vfs_getxattr doesn't lock for us */ ++ mutex_lock(&old_lower_dentry->d_inode->i_mutex); ++ size = vfs_getxattr(old_lower_dentry, name_list, ++ attr_value, XATTR_SIZE_MAX); ++ mutex_unlock(&old_lower_dentry->d_inode->i_mutex); ++ if (size < 0) { ++ err = size; ++ goto out; ++ } ++ if (size > XATTR_SIZE_MAX) { ++ err = -E2BIG; ++ goto out; ++ } ++ /* Don't lock here since vfs_setxattr does it for us. */ ++ err = vfs_setxattr(new_lower_dentry, name_list, attr_value, ++ size, 0); ++ /* ++ * Selinux depends on "security.*" xattrs, so to maintain ++ * the security of copied-up files, if Selinux is active, ++ * then we must copy these xattrs as well. So we need to ++ * temporarily get FOWNER privileges. ++ * XXX: move entire copyup code to SIOQ. ++ */ ++ if (err == -EPERM && !capable(CAP_FOWNER)) { ++ cap_raise(current->cap_effective, CAP_FOWNER); ++ err = vfs_setxattr(new_lower_dentry, name_list, ++ attr_value, size, 0); ++ cap_lower(current->cap_effective, CAP_FOWNER); ++ } ++ if (err < 0) ++ goto out; ++ name_list += strlen(name_list) + 1; ++ } ++out: ++ unionfs_xattr_kfree(name_list_buf); ++ unionfs_xattr_kfree(attr_value); ++ /* Ignore if xattr isn't supported */ ++ if (err == -ENOTSUPP || err == -EOPNOTSUPP) ++ err = 0; ++ return err; ++} ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++/* ++ * Determine the mode based on the copyup flags, and the existing dentry. ++ * ++ * Handle file systems which may not support certain options. For example ++ * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless ++ * errors, rather than propagating them up, which results in copyup errors ++ * and errors returned back to users. ++ */ ++static int copyup_permissions(struct super_block *sb, ++ struct dentry *old_lower_dentry, ++ struct dentry *new_lower_dentry) ++{ ++ struct inode *i = old_lower_dentry->d_inode; ++ struct iattr newattrs; ++ int err; ++ ++ newattrs.ia_atime = i->i_atime; ++ newattrs.ia_mtime = i->i_mtime; ++ newattrs.ia_ctime = i->i_ctime; ++ newattrs.ia_gid = i->i_gid; ++ newattrs.ia_uid = i->i_uid; ++ newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME | ++ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE | ++ ATTR_GID | ATTR_UID; ++ mutex_lock(&new_lower_dentry->d_inode->i_mutex); ++ err = notify_change(new_lower_dentry, &newattrs); ++ if (err) ++ goto out; ++ ++ /* now try to change the mode and ignore EOPNOTSUPP on symlinks */ ++ newattrs.ia_mode = i->i_mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_FORCE; ++ err = notify_change(new_lower_dentry, &newattrs); ++ if (err == -EOPNOTSUPP && ++ S_ISLNK(new_lower_dentry->d_inode->i_mode)) { ++ printk(KERN_WARNING ++ "unionfs: changing \"%s\" symlink mode unsupported\n", ++ new_lower_dentry->d_name.name); ++ err = 0; ++ } ++ ++out: ++ mutex_unlock(&new_lower_dentry->d_inode->i_mutex); ++ return err; ++} ++ ++/* ++ * create the new device/file/directory - use copyup_permission to copyup ++ * times, and mode ++ * ++ * if the object being copied up is a regular file, the file is only created, ++ * the contents have to be copied up separately ++ */ ++static int __copyup_ndentry(struct dentry *old_lower_dentry, ++ struct dentry *new_lower_dentry, ++ struct dentry *new_lower_parent_dentry, ++ char *symbuf) ++{ ++ int err = 0; ++ umode_t old_mode = old_lower_dentry->d_inode->i_mode; ++ struct sioq_args args; ++ ++ if (S_ISDIR(old_mode)) { ++ args.mkdir.parent = new_lower_parent_dentry->d_inode; ++ args.mkdir.dentry = new_lower_dentry; ++ args.mkdir.mode = old_mode; ++ ++ run_sioq(__unionfs_mkdir, &args); ++ err = args.err; ++ } else if (S_ISLNK(old_mode)) { ++ args.symlink.parent = new_lower_parent_dentry->d_inode; ++ args.symlink.dentry = new_lower_dentry; ++ args.symlink.symbuf = symbuf; ++ ++ run_sioq(__unionfs_symlink, &args); ++ err = args.err; ++ } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) || ++ S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) { ++ args.mknod.parent = new_lower_parent_dentry->d_inode; ++ args.mknod.dentry = new_lower_dentry; ++ args.mknod.mode = old_mode; ++ args.mknod.dev = old_lower_dentry->d_inode->i_rdev; ++ ++ run_sioq(__unionfs_mknod, &args); ++ err = args.err; ++ } else if (S_ISREG(old_mode)) { ++ struct nameidata nd; ++ err = init_lower_nd(&nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ args.create.nd = &nd; ++ args.create.parent = new_lower_parent_dentry->d_inode; ++ args.create.dentry = new_lower_dentry; ++ args.create.mode = old_mode; ++ ++ run_sioq(__unionfs_create, &args); ++ err = args.err; ++ release_lower_nd(&nd, err); ++ } else { ++ printk(KERN_CRIT "unionfs: unknown inode type %d\n", ++ old_mode); ++ BUG(); ++ } ++ ++out: ++ return err; ++} ++ ++static int __copyup_reg_data(struct dentry *dentry, ++ struct dentry *new_lower_dentry, int new_bindex, ++ struct dentry *old_lower_dentry, int old_bindex, ++ struct file **copyup_file, loff_t len) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct file *input_file; ++ struct file *output_file; ++ struct vfsmount *output_mnt; ++ mm_segment_t old_fs; ++ char *buf = NULL; ++ ssize_t read_bytes, write_bytes; ++ loff_t size; ++ int err = 0; ++ ++ /* open old file */ ++ unionfs_mntget(dentry, old_bindex); ++ branchget(sb, old_bindex); ++ /* dentry_open calls dput and mntput if it returns an error */ ++ input_file = dentry_open(old_lower_dentry, ++ unionfs_lower_mnt_idx(dentry, old_bindex), ++ O_RDONLY | O_LARGEFILE); ++ if (IS_ERR(input_file)) { ++ dput(old_lower_dentry); ++ err = PTR_ERR(input_file); ++ goto out; ++ } ++ if (unlikely(!input_file->f_op || !input_file->f_op->read)) { ++ err = -EINVAL; ++ goto out_close_in; ++ } ++ ++ /* open new file */ ++ dget(new_lower_dentry); ++ output_mnt = unionfs_mntget(sb->s_root, new_bindex); ++ branchget(sb, new_bindex); ++ output_file = dentry_open(new_lower_dentry, output_mnt, ++ O_RDWR | O_LARGEFILE); ++ if (IS_ERR(output_file)) { ++ err = PTR_ERR(output_file); ++ goto out_close_in2; ++ } ++ if (unlikely(!output_file->f_op || !output_file->f_op->write)) { ++ err = -EINVAL; ++ goto out_close_out; ++ } ++ ++ /* allocating a buffer */ ++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (unlikely(!buf)) { ++ err = -ENOMEM; ++ goto out_close_out; ++ } ++ ++ input_file->f_pos = 0; ++ output_file->f_pos = 0; ++ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ ++ size = len; ++ err = 0; ++ do { ++ if (len >= PAGE_SIZE) ++ size = PAGE_SIZE; ++ else if ((len < PAGE_SIZE) && (len > 0)) ++ size = len; ++ ++ len -= PAGE_SIZE; ++ ++ read_bytes = ++ input_file->f_op->read(input_file, ++ (char __user *)buf, size, ++ &input_file->f_pos); ++ if (read_bytes <= 0) { ++ err = read_bytes; ++ break; ++ } ++ ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ write_bytes = ++ output_file->f_op->write(output_file, ++ (char __user *)buf, ++ read_bytes, ++ &output_file->f_pos); ++ lockdep_on(); ++ if ((write_bytes < 0) || (write_bytes < read_bytes)) { ++ err = write_bytes; ++ break; ++ } ++ } while ((read_bytes > 0) && (len > 0)); ++ ++ set_fs(old_fs); ++ ++ kfree(buf); ++ ++ if (!err) ++ err = output_file->f_op->fsync(output_file, ++ new_lower_dentry, 0); ++ ++ if (err) ++ goto out_close_out; ++ ++ if (copyup_file) { ++ *copyup_file = output_file; ++ goto out_close_in; ++ } ++ ++out_close_out: ++ fput(output_file); ++ ++out_close_in2: ++ branchput(sb, new_bindex); ++ ++out_close_in: ++ fput(input_file); ++ ++out: ++ branchput(sb, old_bindex); ++ ++ return err; ++} ++ ++/* ++ * dput the lower references for old and new dentry & clear a lower dentry ++ * pointer ++ */ ++static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry, ++ int old_bstart, int old_bend, ++ struct dentry *new_lower_dentry, int new_bindex) ++{ ++ /* get rid of the lower dentry and all its traces */ ++ unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL); ++ dbstart(dentry) = old_bstart; ++ dbend(dentry) = old_bend; ++ ++ dput(new_lower_dentry); ++ dput(old_lower_dentry); ++} ++ ++/* ++ * Copy up a dentry to a file of specified name. ++ * ++ * @dir: used to pull the ->i_sb to access other branches ++ * @dentry: the non-negative dentry whose lower_inode we should copy ++ * @bstart: the branch of the lower_inode to copy from ++ * @new_bindex: the branch to create the new file in ++ * @name: the name of the file to create ++ * @namelen: length of @name ++ * @copyup_file: the "struct file" to return (optional) ++ * @len: how many bytes to copy-up? ++ */ ++int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart, ++ int new_bindex, const char *name, int namelen, ++ struct file **copyup_file, loff_t len) ++{ ++ struct dentry *new_lower_dentry; ++ struct dentry *old_lower_dentry = NULL; ++ struct super_block *sb; ++ int err = 0; ++ int old_bindex; ++ int old_bstart; ++ int old_bend; ++ struct dentry *new_lower_parent_dentry = NULL; ++ mm_segment_t oldfs; ++ char *symbuf = NULL; ++ ++ verify_locked(dentry); ++ ++ old_bindex = bstart; ++ old_bstart = dbstart(dentry); ++ old_bend = dbend(dentry); ++ ++ BUG_ON(new_bindex < 0); ++ BUG_ON(new_bindex >= old_bindex); ++ ++ sb = dir->i_sb; ++ ++ err = is_robranch_super(sb, new_bindex); ++ if (err) ++ goto out; ++ ++ /* Create the directory structure above this dentry. */ ++ new_lower_dentry = create_parents(dir, dentry, name, new_bindex); ++ if (IS_ERR(new_lower_dentry)) { ++ err = PTR_ERR(new_lower_dentry); ++ goto out; ++ } ++ ++ old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex); ++ /* we conditionally dput this old_lower_dentry at end of function */ ++ dget(old_lower_dentry); ++ ++ /* For symlinks, we must read the link before we lock the directory. */ ++ if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) { ++ ++ symbuf = kmalloc(PATH_MAX, GFP_KERNEL); ++ if (unlikely(!symbuf)) { ++ __clear(dentry, old_lower_dentry, ++ old_bstart, old_bend, ++ new_lower_dentry, new_bindex); ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = old_lower_dentry->d_inode->i_op->readlink( ++ old_lower_dentry, ++ (char __user *)symbuf, ++ PATH_MAX); ++ set_fs(oldfs); ++ if (err < 0) { ++ __clear(dentry, old_lower_dentry, ++ old_bstart, old_bend, ++ new_lower_dentry, new_bindex); ++ goto out_free; ++ } ++ symbuf[err] = '\0'; ++ } ++ ++ /* Now we lock the parent, and create the object in the new branch. */ ++ new_lower_parent_dentry = lock_parent(new_lower_dentry); ++ ++ /* create the new inode */ ++ err = __copyup_ndentry(old_lower_dentry, new_lower_dentry, ++ new_lower_parent_dentry, symbuf); ++ ++ if (err) { ++ __clear(dentry, old_lower_dentry, ++ old_bstart, old_bend, ++ new_lower_dentry, new_bindex); ++ goto out_unlock; ++ } ++ ++ /* We actually copyup the file here. */ ++ if (S_ISREG(old_lower_dentry->d_inode->i_mode)) ++ err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex, ++ old_lower_dentry, old_bindex, ++ copyup_file, len); ++ if (err) ++ goto out_unlink; ++ ++ /* Set permissions. */ ++ err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry); ++ if (err) ++ goto out_unlink; ++ ++#ifdef CONFIG_UNION_FS_XATTR ++ /* Selinux uses extended attributes for permissions. */ ++ err = copyup_xattrs(old_lower_dentry, new_lower_dentry); ++ if (err) ++ goto out_unlink; ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++ /* do not allow files getting deleted to be re-interposed */ ++ if (!d_deleted(dentry)) ++ unionfs_reinterpose(dentry); ++ ++ goto out_unlock; ++ ++out_unlink: ++ /* ++ * copyup failed, because we possibly ran out of space or ++ * quota, or something else happened so let's unlink; we don't ++ * really care about the return value of vfs_unlink ++ */ ++ vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry); ++ ++ if (copyup_file) { ++ /* need to close the file */ ++ ++ fput(*copyup_file); ++ branchput(sb, new_bindex); ++ } ++ ++ /* ++ * TODO: should we reset the error to something like -EIO? ++ * ++ * If we don't reset, the user may get some nonsensical errors, but ++ * on the other hand, if we reset to EIO, we guarantee that the user ++ * will get a "confusing" error message. ++ */ ++ ++out_unlock: ++ unlock_dir(new_lower_parent_dentry); ++ ++out_free: ++ /* ++ * If old_lower_dentry was not a file, then we need to dput it. If ++ * it was a file, then it was already dput indirectly by other ++ * functions we call above which operate on regular files. ++ */ ++ if (old_lower_dentry && old_lower_dentry->d_inode && ++ !S_ISREG(old_lower_dentry->d_inode->i_mode)) ++ dput(old_lower_dentry); ++ kfree(symbuf); ++ ++ if (err) ++ goto out; ++ if (!S_ISDIR(dentry->d_inode->i_mode)) { ++ unionfs_postcopyup_release(dentry); ++ if (!unionfs_lower_inode(dentry->d_inode)) { ++ /* ++ * If we got here, then we copied up to an ++ * unlinked-open file, whose name is .unionfsXXXXX. ++ */ ++ struct inode *inode = new_lower_dentry->d_inode; ++ atomic_inc(&inode->i_count); ++ unionfs_set_lower_inode_idx(dentry->d_inode, ++ ibstart(dentry->d_inode), ++ inode); ++ } ++ } ++ unionfs_postcopyup_setmnt(dentry); ++ /* sync inode times from copied-up inode to our inode */ ++ unionfs_copy_attr_times(dentry->d_inode); ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(dentry); ++out: ++ return err; ++} ++ ++/* ++ * This function creates a copy of a file represented by 'file' which ++ * currently resides in branch 'bstart' to branch 'new_bindex.' The copy ++ * will be named "name". ++ */ ++int copyup_named_file(struct inode *dir, struct file *file, char *name, ++ int bstart, int new_bindex, loff_t len) ++{ ++ int err = 0; ++ struct file *output_file = NULL; ++ ++ err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex, ++ name, strlen(name), &output_file, len); ++ if (!err) { ++ fbstart(file) = new_bindex; ++ unionfs_set_lower_file_idx(file, new_bindex, output_file); ++ } ++ ++ return err; ++} ++ ++/* ++ * This function creates a copy of a file represented by 'file' which ++ * currently resides in branch 'bstart' to branch 'new_bindex'. ++ */ ++int copyup_file(struct inode *dir, struct file *file, int bstart, ++ int new_bindex, loff_t len) ++{ ++ int err = 0; ++ struct file *output_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ err = copyup_dentry(dir, dentry, bstart, new_bindex, ++ dentry->d_name.name, dentry->d_name.len, ++ &output_file, len); ++ if (!err) { ++ fbstart(file) = new_bindex; ++ unionfs_set_lower_file_idx(file, new_bindex, output_file); ++ } ++ ++ return err; ++} ++ ++/* purge a dentry's lower-branch states (dput/mntput, etc.) */ ++static void __cleanup_dentry(struct dentry *dentry, int bindex, ++ int old_bstart, int old_bend) ++{ ++ int loop_start; ++ int loop_end; ++ int new_bstart = -1; ++ int new_bend = -1; ++ int i; ++ ++ loop_start = min(old_bstart, bindex); ++ loop_end = max(old_bend, bindex); ++ ++ /* ++ * This loop sets the bstart and bend for the new dentry by ++ * traversing from left to right. It also dputs all negative ++ * dentries except bindex ++ */ ++ for (i = loop_start; i <= loop_end; i++) { ++ if (!unionfs_lower_dentry_idx(dentry, i)) ++ continue; ++ ++ if (i == bindex) { ++ new_bend = i; ++ if (new_bstart < 0) ++ new_bstart = i; ++ continue; ++ } ++ ++ if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) { ++ dput(unionfs_lower_dentry_idx(dentry, i)); ++ unionfs_set_lower_dentry_idx(dentry, i, NULL); ++ ++ unionfs_mntput(dentry, i); ++ unionfs_set_lower_mnt_idx(dentry, i, NULL); ++ } else { ++ if (new_bstart < 0) ++ new_bstart = i; ++ new_bend = i; ++ } ++ } ++ ++ if (new_bstart < 0) ++ new_bstart = bindex; ++ if (new_bend < 0) ++ new_bend = bindex; ++ dbstart(dentry) = new_bstart; ++ dbend(dentry) = new_bend; ++ ++} ++ ++/* set lower inode ptr and update bstart & bend if necessary */ ++static void __set_inode(struct dentry *upper, struct dentry *lower, ++ int bindex) ++{ ++ unionfs_set_lower_inode_idx(upper->d_inode, bindex, ++ igrab(lower->d_inode)); ++ if (likely(ibstart(upper->d_inode) > bindex)) ++ ibstart(upper->d_inode) = bindex; ++ if (likely(ibend(upper->d_inode) < bindex)) ++ ibend(upper->d_inode) = bindex; ++ ++} ++ ++/* set lower dentry ptr and update bstart & bend if necessary */ ++static void __set_dentry(struct dentry *upper, struct dentry *lower, ++ int bindex) ++{ ++ unionfs_set_lower_dentry_idx(upper, bindex, lower); ++ if (likely(dbstart(upper) > bindex)) ++ dbstart(upper) = bindex; ++ if (likely(dbend(upper) < bindex)) ++ dbend(upper) = bindex; ++} ++ ++/* ++ * This function replicates the directory structure up-to given dentry ++ * in the bindex branch. ++ */ ++struct dentry *create_parents(struct inode *dir, struct dentry *dentry, ++ const char *name, int bindex) ++{ ++ int err; ++ struct dentry *child_dentry; ++ struct dentry *parent_dentry; ++ struct dentry *lower_parent_dentry = NULL; ++ struct dentry *lower_dentry = NULL; ++ const char *childname; ++ unsigned int childnamelen; ++ int nr_dentry; ++ int count = 0; ++ int old_bstart; ++ int old_bend; ++ struct dentry **path = NULL; ++ struct super_block *sb; ++ ++ verify_locked(dentry); ++ ++ err = is_robranch_super(dir->i_sb, bindex); ++ if (err) { ++ lower_dentry = ERR_PTR(err); ++ goto out; ++ } ++ ++ old_bstart = dbstart(dentry); ++ old_bend = dbend(dentry); ++ ++ lower_dentry = ERR_PTR(-ENOMEM); ++ ++ /* There is no sense allocating any less than the minimum. */ ++ nr_dentry = 1; ++ path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL); ++ if (unlikely(!path)) ++ goto out; ++ ++ /* assume the negative dentry of unionfs as the parent dentry */ ++ parent_dentry = dentry; ++ ++ /* ++ * This loop finds the first parent that exists in the given branch. ++ * We start building the directory structure from there. At the end ++ * of the loop, the following should hold: ++ * - child_dentry is the first nonexistent child ++ * - parent_dentry is the first existent parent ++ * - path[0] is the = deepest child ++ * - path[count] is the first child to create ++ */ ++ do { ++ child_dentry = parent_dentry; ++ ++ /* find the parent directory dentry in unionfs */ ++ parent_dentry = dget_parent(child_dentry); ++ ++ /* find out the lower_parent_dentry in the given branch */ ++ lower_parent_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ ++ /* grow path table */ ++ if (count == nr_dentry) { ++ void *p; ++ ++ nr_dentry *= 2; ++ p = krealloc(path, nr_dentry * sizeof(struct dentry *), ++ GFP_KERNEL); ++ if (unlikely(!p)) { ++ lower_dentry = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ path = p; ++ } ++ ++ /* store the child dentry */ ++ path[count++] = child_dentry; ++ } while (!lower_parent_dentry); ++ count--; ++ ++ sb = dentry->d_sb; ++ ++ /* ++ * This code goes between the begin/end labels and basically ++ * emulates a while(child_dentry != dentry), only cleaner and ++ * shorter than what would be a much longer while loop. ++ */ ++begin: ++ /* get lower parent dir in the current branch */ ++ lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex); ++ dput(parent_dentry); ++ ++ /* init the values to lookup */ ++ childname = child_dentry->d_name.name; ++ childnamelen = child_dentry->d_name.len; ++ ++ if (child_dentry != dentry) { ++ /* lookup child in the underlying file system */ ++ lower_dentry = lookup_one_len(childname, lower_parent_dentry, ++ childnamelen); ++ if (IS_ERR(lower_dentry)) ++ goto out; ++ } else { ++ /* ++ * Is the name a whiteout of the child name ? lookup the ++ * whiteout child in the underlying file system ++ */ ++ lower_dentry = lookup_one_len(name, lower_parent_dentry, ++ strlen(name)); ++ if (IS_ERR(lower_dentry)) ++ goto out; ++ ++ /* Replace the current dentry (if any) with the new one */ ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_set_lower_dentry_idx(dentry, bindex, ++ lower_dentry); ++ ++ __cleanup_dentry(dentry, bindex, old_bstart, old_bend); ++ goto out; ++ } ++ ++ if (lower_dentry->d_inode) { ++ /* ++ * since this already exists we dput to avoid ++ * multiple references on the same dentry ++ */ ++ dput(lower_dentry); ++ } else { ++ struct sioq_args args; ++ ++ /* it's a negative dentry, create a new dir */ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ ++ args.mkdir.parent = lower_parent_dentry->d_inode; ++ args.mkdir.dentry = lower_dentry; ++ args.mkdir.mode = child_dentry->d_inode->i_mode; ++ ++ run_sioq(__unionfs_mkdir, &args); ++ err = args.err; ++ ++ if (!err) ++ err = copyup_permissions(dir->i_sb, child_dentry, ++ lower_dentry); ++ unlock_dir(lower_parent_dentry); ++ if (err) { ++ dput(lower_dentry); ++ lower_dentry = ERR_PTR(err); ++ goto out; ++ } ++ ++ } ++ ++ __set_inode(child_dentry, lower_dentry, bindex); ++ __set_dentry(child_dentry, lower_dentry, bindex); ++ /* ++ * update times of this dentry, but also the parent, because if ++ * we changed, the parent may have changed too. ++ */ ++ fsstack_copy_attr_times(parent_dentry->d_inode, ++ lower_parent_dentry->d_inode); ++ unionfs_copy_attr_times(child_dentry->d_inode); ++ ++ parent_dentry = child_dentry; ++ child_dentry = path[--count]; ++ goto begin; ++out: ++ /* cleanup any leftover locks from the do/while loop above */ ++ if (IS_ERR(lower_dentry)) ++ while (count) ++ dput(path[count--]); ++ kfree(path); ++ return lower_dentry; ++} ++ ++/* ++ * Post-copyup helper to ensure we have valid mnts: set lower mnt of ++ * dentry+parents to the first parent node that has an mnt. ++ */ ++void unionfs_postcopyup_setmnt(struct dentry *dentry) ++{ ++ struct dentry *parent, *hasone; ++ int bindex = dbstart(dentry); ++ ++ if (unionfs_lower_mnt_idx(dentry, bindex)) ++ return; ++ hasone = dentry->d_parent; ++ /* this loop should stop at root dentry */ ++ while (!unionfs_lower_mnt_idx(hasone, bindex)) ++ hasone = hasone->d_parent; ++ parent = dentry; ++ while (!unionfs_lower_mnt_idx(parent, bindex)) { ++ unionfs_set_lower_mnt_idx(parent, bindex, ++ unionfs_mntget(hasone, bindex)); ++ parent = parent->d_parent; ++ } ++} ++ ++/* ++ * Post-copyup helper to release all non-directory source objects of a ++ * copied-up file. Regular files should have only one lower object. ++ */ ++void unionfs_postcopyup_release(struct dentry *dentry) ++{ ++ int bstart, bend; ++ ++ BUG_ON(S_ISDIR(dentry->d_inode->i_mode)); ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ path_put_lowers(dentry, bstart + 1, bend, false); ++ iput_lowers(dentry->d_inode, bstart + 1, bend, false); ++ ++ dbend(dentry) = bstart; ++ ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bstart; ++} +diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c +new file mode 100644 +index 0000000..db62d22 +--- /dev/null ++++ b/fs/unionfs/debug.c +@@ -0,0 +1,533 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Helper debugging functions for maintainers (and for users to report back ++ * useful information back to maintainers) ++ */ ++ ++/* it's always useful to know what part of the code called us */ ++#define PRINT_CALLER(fname, fxn, line) \ ++ do { \ ++ if (!printed_caller) { \ ++ pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \ ++ printed_caller = 1; \ ++ } \ ++ } while (0) ++ ++/* ++ * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on ++ * the fan-out of various Unionfs objects. We check that no lower objects ++ * exist outside the start/end branch range; that all objects within are ++ * non-NULL (with some allowed exceptions); that for every lower file ++ * there's a lower dentry+inode; that the start/end ranges match for all ++ * corresponding lower objects; that open files/symlinks have only one lower ++ * objects, but directories can have several; and more. ++ */ ++void __unionfs_check_inode(const struct inode *inode, ++ const char *fname, const char *fxn, int line) ++{ ++ int bindex; ++ int istart, iend; ++ struct inode *lower_inode; ++ struct super_block *sb; ++ int printed_caller = 0; ++ void *poison_ptr; ++ ++ /* for inodes now */ ++ BUG_ON(!inode); ++ sb = inode->i_sb; ++ istart = ibstart(inode); ++ iend = ibend(inode); ++ /* don't check inode if no lower branches */ ++ if (istart < 0 && iend < 0) ++ return; ++ if (unlikely(istart > iend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci0: inode=%p istart/end=%d:%d\n", ++ inode, istart, iend); ++ } ++ if (unlikely((istart == -1 && iend != -1) || ++ (istart != -1 && iend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci1: inode=%p istart/end=%d:%d\n", ++ inode, istart, iend); ++ } ++ if (!S_ISDIR(inode->i_mode)) { ++ if (unlikely(iend != istart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci2: inode=%p istart=%d iend=%d\n", ++ inode, istart, iend); ++ } ++ } ++ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ if (unlikely(!UNIONFS_I(inode))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci3: no inode_info %p\n", inode); ++ return; ++ } ++ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci4: no lower_inodes %p\n", inode); ++ return; ++ } ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (lower_inode) { ++ memset(&poison_ptr, POISON_INUSE, sizeof(void *)); ++ if (unlikely(bindex < istart || bindex > iend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci5: inode/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", inode, ++ lower_inode, bindex, istart, iend); ++ } else if (unlikely(lower_inode == poison_ptr)) { ++ /* freed inode! */ ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci6: inode/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", inode, ++ lower_inode, bindex, istart, iend); ++ } ++ continue; ++ } ++ /* if we get here, then lower_inode == NULL */ ++ if (bindex < istart || bindex > iend) ++ continue; ++ /* ++ * directories can have NULL lower inodes in b/t start/end, ++ * but NOT if at the start/end range. ++ */ ++ if (unlikely(S_ISDIR(inode->i_mode) && ++ bindex > istart && bindex < iend)) ++ continue; ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci7: inode/linode=%p:%p " ++ "bindex=%d istart/end=%d:%d\n", ++ inode, lower_inode, bindex, istart, iend); ++ } ++} ++ ++void __unionfs_check_dentry(const struct dentry *dentry, ++ const char *fname, const char *fxn, int line) ++{ ++ int bindex; ++ int dstart, dend, istart, iend; ++ struct dentry *lower_dentry; ++ struct inode *inode, *lower_inode; ++ struct super_block *sb; ++ struct vfsmount *lower_mnt; ++ int printed_caller = 0; ++ void *poison_ptr; ++ ++ BUG_ON(!dentry); ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ dstart = dbstart(dentry); ++ dend = dbend(dentry); ++ /* don't check dentry/mnt if no lower branches */ ++ if (dstart < 0 && dend < 0) ++ goto check_inode; ++ BUG_ON(dstart > dend); ++ ++ if (unlikely((dstart == -1 && dend != -1) || ++ (dstart != -1 && dend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n", ++ dentry, dstart, dend); ++ } ++ /* ++ * check for NULL dentries inside the start/end range, or ++ * non-NULL dentries outside the start/end range. ++ */ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (lower_dentry) { ++ if (unlikely(bindex < dstart || bindex > dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CD1: dentry/lower=%p:%p(%p) " ++ "bindex=%d dstart/end=%d:%d\n", ++ dentry, lower_dentry, ++ (lower_dentry ? lower_dentry->d_inode : ++ (void *) -1L), ++ bindex, dstart, dend); ++ } ++ } else { /* lower_dentry == NULL */ ++ if (bindex < dstart || bindex > dend) ++ continue; ++ /* ++ * Directories can have NULL lower inodes in b/t ++ * start/end, but NOT if at the start/end range. ++ * Ignore this rule, however, if this is a NULL ++ * dentry or a deleted dentry. ++ */ ++ if (unlikely(!d_deleted((struct dentry *) dentry) && ++ inode && ++ !(inode && S_ISDIR(inode->i_mode) && ++ bindex > dstart && bindex < dend))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CD2: dentry/lower=%p:%p(%p) " ++ "bindex=%d dstart/end=%d:%d\n", ++ dentry, lower_dentry, ++ (lower_dentry ? ++ lower_dentry->d_inode : ++ (void *) -1L), ++ bindex, dstart, dend); ++ } ++ } ++ } ++ ++ /* check for vfsmounts same as for dentries */ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (lower_mnt) { ++ if (unlikely(bindex < dstart || bindex > dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d " ++ "dstart/end=%d:%d\n", dentry, ++ lower_mnt, bindex, dstart, dend); ++ } ++ } else { /* lower_mnt == NULL */ ++ if (bindex < dstart || bindex > dend) ++ continue; ++ /* ++ * Directories can have NULL lower inodes in b/t ++ * start/end, but NOT if at the start/end range. ++ * Ignore this rule, however, if this is a NULL ++ * dentry. ++ */ ++ if (unlikely(inode && ++ !(inode && S_ISDIR(inode->i_mode) && ++ bindex > dstart && bindex < dend))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CM1: dentry/lmnt=%p:%p " ++ "bindex=%d dstart/end=%d:%d\n", ++ dentry, lower_mnt, bindex, ++ dstart, dend); ++ } ++ } ++ } ++ ++check_inode: ++ /* for inodes now */ ++ if (!inode) ++ return; ++ istart = ibstart(inode); ++ iend = ibend(inode); ++ /* don't check inode if no lower branches */ ++ if (istart < 0 && iend < 0) ++ return; ++ BUG_ON(istart > iend); ++ if (unlikely((istart == -1 && iend != -1) || ++ (istart != -1 && iend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n", ++ dentry, inode, istart, iend); ++ } ++ if (unlikely(istart != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n", ++ dentry, inode, istart, dstart); ++ } ++ if (unlikely(iend != dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n", ++ dentry, inode, iend, dend); ++ } ++ ++ if (!S_ISDIR(inode->i_mode)) { ++ if (unlikely(dend != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n", ++ dentry, inode, dstart, dend); ++ } ++ if (unlikely(iend != istart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n", ++ dentry, inode, istart, iend); ++ } ++ } ++ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (lower_inode) { ++ memset(&poison_ptr, POISON_INUSE, sizeof(void *)); ++ if (unlikely(bindex < istart || bindex > iend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI5: dentry/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", dentry, ++ lower_inode, bindex, istart, iend); ++ } else if (unlikely(lower_inode == poison_ptr)) { ++ /* freed inode! */ ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI6: dentry/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", dentry, ++ lower_inode, bindex, istart, iend); ++ } ++ continue; ++ } ++ /* if we get here, then lower_inode == NULL */ ++ if (bindex < istart || bindex > iend) ++ continue; ++ /* ++ * directories can have NULL lower inodes in b/t start/end, ++ * but NOT if at the start/end range. ++ */ ++ if (unlikely(S_ISDIR(inode->i_mode) && ++ bindex > istart && bindex < iend)) ++ continue; ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI7: dentry/linode=%p:%p " ++ "bindex=%d istart/end=%d:%d\n", ++ dentry, lower_inode, bindex, istart, iend); ++ } ++ ++ /* ++ * If it's a directory, then intermediate objects b/t start/end can ++ * be NULL. But, check that all three are NULL: lower dentry, mnt, ++ * and inode. ++ */ ++ if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode)) ++ for (bindex = dstart+1; bindex < dend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ lower_dentry = unionfs_lower_dentry_idx(dentry, ++ bindex); ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (unlikely(!((lower_inode && lower_dentry && ++ lower_mnt) || ++ (!lower_inode && ++ !lower_dentry && !lower_mnt)))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p " ++ "bindex=%d dstart/end=%d:%d\n", ++ lower_mnt, lower_dentry, lower_inode, ++ bindex, dstart, dend); ++ } ++ } ++ /* check if lower inode is newer than upper one (it shouldn't) */ ++ if (unlikely(is_newer_lower(dentry) && !is_negative_lower(dentry))) { ++ PRINT_CALLER(fname, fxn, line); ++ for (bindex = ibstart(inode); bindex <= ibend(inode); ++ bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (unlikely(!lower_inode)) ++ continue; ++ pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu " ++ "ctime/lctime=%lu.%lu/%lu.%lu\n", ++ bindex, ++ inode->i_mtime.tv_sec, ++ inode->i_mtime.tv_nsec, ++ lower_inode->i_mtime.tv_sec, ++ lower_inode->i_mtime.tv_nsec, ++ inode->i_ctime.tv_sec, ++ inode->i_ctime.tv_nsec, ++ lower_inode->i_ctime.tv_sec, ++ lower_inode->i_ctime.tv_nsec); ++ } ++ } ++} ++ ++void __unionfs_check_file(const struct file *file, ++ const char *fname, const char *fxn, int line) ++{ ++ int bindex; ++ int dstart, dend, fstart, fend; ++ struct dentry *dentry; ++ struct file *lower_file; ++ struct inode *inode; ++ struct super_block *sb; ++ int printed_caller = 0; ++ ++ BUG_ON(!file); ++ dentry = file->f_path.dentry; ++ sb = dentry->d_sb; ++ dstart = dbstart(dentry); ++ dend = dbend(dentry); ++ BUG_ON(dstart > dend); ++ fstart = fbstart(file); ++ fend = fbend(file); ++ BUG_ON(fstart > fend); ++ ++ if (unlikely((fstart == -1 && fend != -1) || ++ (fstart != -1 && fend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n", ++ file, dentry, fstart, fend); ++ } ++ if (unlikely(fstart != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n", ++ file, dentry, fstart, dstart); ++ } ++ if (unlikely(fend != dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n", ++ file, dentry, fend, dend); ++ } ++ inode = dentry->d_inode; ++ if (!S_ISDIR(inode->i_mode)) { ++ if (unlikely(fend != fstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n", ++ file, inode, fstart, fend); ++ } ++ if (unlikely(dend != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n", ++ file, dentry, dstart, dend); ++ } ++ } ++ ++ /* ++ * check for NULL dentries inside the start/end range, or ++ * non-NULL dentries outside the start/end range. ++ */ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ if (lower_file) { ++ if (unlikely(bindex < fstart || bindex > fend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF5: file/lower=%p:%p bindex=%d " ++ "fstart/end=%d:%d\n", file, ++ lower_file, bindex, fstart, fend); ++ } ++ } else { /* lower_file == NULL */ ++ if (bindex >= fstart && bindex <= fend) { ++ /* ++ * directories can have NULL lower inodes in ++ * b/t start/end, but NOT if at the ++ * start/end range. ++ */ ++ if (unlikely(!(S_ISDIR(inode->i_mode) && ++ bindex > fstart && ++ bindex < fend))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF6: file/lower=%p:%p " ++ "bindex=%d fstart/end=%d:%d\n", ++ file, lower_file, bindex, ++ fstart, fend); ++ } ++ } ++ } ++ } ++ ++ __unionfs_check_dentry(dentry, fname, fxn, line); ++} ++ ++void __unionfs_check_nd(const struct nameidata *nd, ++ const char *fname, const char *fxn, int line) ++{ ++ struct file *file; ++ int printed_caller = 0; ++ ++ if (unlikely(!nd)) ++ return; ++ if (nd->flags & LOOKUP_OPEN) { ++ file = nd->intent.open.file; ++ if (unlikely(file->f_path.dentry && ++ strcmp(file->f_path.dentry->d_sb->s_type->name, ++ UNIONFS_NAME))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CND1: lower_file of type %s\n", ++ file->f_path.dentry->d_sb->s_type->name); ++ BUG(); ++ } ++ } ++} ++ ++/* useful to track vfsmount leaks that could cause EBUSY on unmount */ ++void __show_branch_counts(const struct super_block *sb, ++ const char *file, const char *fxn, int line) ++{ ++ int i; ++ struct vfsmount *mnt; ++ ++ pr_debug("BC:"); ++ for (i = 0; i < sbmax(sb); i++) { ++ if (likely(sb->s_root)) ++ mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt; ++ else ++ mnt = NULL; ++ printk(KERN_CONT "%d:", ++ (mnt ? atomic_read(&mnt->mnt_count) : -99)); ++ } ++ printk(KERN_CONT "%s:%s:%d\n", file, fxn, line); ++} ++ ++void __show_inode_times(const struct inode *inode, ++ const char *file, const char *fxn, int line) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (unlikely(!lower_inode)) ++ continue; ++ pr_debug("IT(%lu:%d): %s:%s:%d " ++ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n", ++ inode->i_ino, bindex, ++ file, fxn, line, ++ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, ++ lower_inode->i_mtime.tv_sec, ++ lower_inode->i_mtime.tv_nsec, ++ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ++ lower_inode->i_ctime.tv_sec, ++ lower_inode->i_ctime.tv_nsec); ++ } ++} ++ ++void __show_dinode_times(const struct dentry *dentry, ++ const char *file, const char *fxn, int line) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct inode *lower_inode; ++ int bindex; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ pr_debug("DT(%s:%lu:%d): %s:%s:%d " ++ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n", ++ dentry->d_name.name, inode->i_ino, bindex, ++ file, fxn, line, ++ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, ++ lower_inode->i_mtime.tv_sec, ++ lower_inode->i_mtime.tv_nsec, ++ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ++ lower_inode->i_ctime.tv_sec, ++ lower_inode->i_ctime.tv_nsec); ++ } ++} ++ ++void __show_inode_counts(const struct inode *inode, ++ const char *file, const char *fxn, int line) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ ++ if (unlikely(!inode)) { ++ pr_debug("SiC: Null inode\n"); ++ return; ++ } ++ for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb); ++ bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (unlikely(!lower_inode)) ++ continue; ++ pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n", ++ inode->i_ino, bindex, ++ atomic_read(&(inode)->i_count), ++ atomic_read(&(lower_inode)->i_count), ++ file, fxn, line); ++ } ++} +diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c +new file mode 100644 +index 0000000..7f0c7f7 +--- /dev/null ++++ b/fs/unionfs/dentry.c +@@ -0,0 +1,570 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++bool is_negative_lower(const struct dentry *dentry) ++{ ++ int bindex; ++ struct dentry *lower_dentry; ++ ++ BUG_ON(!dentry); ++ /* cache coherency: check if file was deleted on lower branch */ ++ if (dbstart(dentry) < 0) ++ return true; ++ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ /* unhashed (i.e., unlinked) lower dentries don't count */ ++ if (lower_dentry && lower_dentry->d_inode && ++ !d_deleted(lower_dentry) && ++ !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)) ++ return false; ++ } ++ return true; ++} ++ ++static inline void __dput_lowers(struct dentry *dentry, int start, int end) ++{ ++ struct dentry *lower_dentry; ++ int bindex; ++ ++ if (start < 0) ++ return; ++ for (bindex = start; bindex <= end; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ dput(lower_dentry); ++ } ++} ++ ++/* ++ * Revalidate a single dentry. ++ * Assume that dentry's info node is locked. ++ * Assume that parent(s) are all valid already, but ++ * the child may not yet be valid. ++ * Returns true if valid, false otherwise. ++ */ ++static bool __unionfs_d_revalidate_one(struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ bool valid = true; /* default is valid */ ++ struct dentry *lower_dentry; ++ int bindex, bstart, bend; ++ int sbgen, dgen; ++ int positive = 0; ++ int interpose_flag; ++ struct nameidata lowernd; /* TODO: be gentler to the stack */ ++ ++ if (nd) ++ memcpy(&lowernd, nd, sizeof(struct nameidata)); ++ else ++ memset(&lowernd, 0, sizeof(struct nameidata)); ++ ++ verify_locked(dentry); ++ verify_locked(dentry->d_parent); ++ ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ /* if the dentry is unhashed, do NOT revalidate */ ++ if (d_deleted(dentry)) ++ goto out; ++ ++ BUG_ON(dbstart(dentry) == -1); ++ if (dentry->d_inode) ++ positive = 1; ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ /* ++ * If we are working on an unconnected dentry, then there is no ++ * revalidation to be done, because this file does not exist within ++ * the namespace, and Unionfs operates on the namespace, not data. ++ */ ++ if (unlikely(sbgen != dgen)) { ++ struct dentry *result; ++ int pdgen; ++ ++ /* The root entry should always be valid */ ++ BUG_ON(IS_ROOT(dentry)); ++ ++ /* We can't work correctly if our parent isn't valid. */ ++ pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation); ++ BUG_ON(pdgen != sbgen); /* should never happen here */ ++ ++ /* Free the pointers for our inodes and this dentry. */ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ /* ++ * mntput unhashed lower dentries, because those files got ++ * deleted or rmdir'ed. ++ */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (!d_deleted(lower_dentry) && ++ !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)) ++ continue; ++ unionfs_mntput(dentry, bindex); ++ } ++ ++ __dput_lowers(dentry, bstart, bend); ++ dbstart(dentry) = dbend(dentry) = -1; ++ ++ interpose_flag = INTERPOSE_REVAL_NEG; ++ if (positive) { ++ interpose_flag = INTERPOSE_REVAL; ++ iput_lowers_all(dentry->d_inode, true); ++ } ++ ++ if (realloc_dentry_private_data(dentry) != 0) { ++ valid = false; ++ goto out; ++ } ++ ++ result = unionfs_lookup_full(dentry, &lowernd, interpose_flag); ++ if (result) { ++ if (IS_ERR(result)) { ++ valid = false; ++ goto out; ++ } ++ /* ++ * current unionfs_lookup_backend() doesn't return ++ * a valid dentry ++ */ ++ dput(dentry); ++ dentry = result; ++ } ++ ++ if (unlikely(positive && is_negative_lower(dentry))) { ++ make_bad_inode(dentry->d_inode); ++ d_drop(dentry); ++ valid = false; ++ goto out; ++ } ++ goto out; ++ } ++ ++ /* The revalidation must occur across all branches */ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ BUG_ON(bstart == -1); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry || !lower_dentry->d_op ++ || !lower_dentry->d_op->d_revalidate) ++ continue; ++ /* ++ * Don't pass nameidata to lower file system, because we ++ * don't want an arbitrary lower file being opened or ++ * returned to us: it may be useless to us because of the ++ * fanout nature of unionfs (cf. file/directory open-file ++ * invariants). We will open lower files as and when needed ++ * later on. ++ */ ++ if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL)) ++ valid = false; ++ } ++ ++ if (!dentry->d_inode || ++ ibstart(dentry->d_inode) < 0 || ++ ibend(dentry->d_inode) < 0) { ++ valid = false; ++ goto out; ++ } ++ ++ if (valid) { ++ /* ++ * If we get here, and we copy the meta-data from the lower ++ * inode to our inode, then it is vital that we have already ++ * purged all unionfs-level file data. We do that in the ++ * caller (__unionfs_d_revalidate_chain) by calling ++ * purge_inode_data. ++ */ ++ unionfs_copy_attr_all(dentry->d_inode, ++ unionfs_lower_inode(dentry->d_inode)); ++ fsstack_copy_inode_size(dentry->d_inode, ++ unionfs_lower_inode(dentry->d_inode)); ++ } ++ ++out: ++ if (valid) ++ atomic_set(&UNIONFS_D(dentry)->generation, sbgen); ++ ++ return valid; ++} ++ ++/* ++ * Determine if the lower inode objects have changed from below the unionfs ++ * inode. Return true if changed, false otherwise. ++ * ++ * We check if the mtime or ctime have changed. However, the inode times ++ * can be changed by anyone without much protection, including ++ * asynchronously. This can sometimes cause unionfs to find that the lower ++ * file system doesn't change its inode times quick enough, resulting in a ++ * false positive indication (which is harmless, it just makes unionfs do ++ * extra work in re-validating the objects). To minimize the chances of ++ * these situations, we still consider such small time changes valid, but we ++ * don't print debugging messages unless the time changes are greater than ++ * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin) ++ * because significant changes are more likely due to users manually ++ * touching lower files. ++ */ ++bool is_newer_lower(const struct dentry *dentry) ++{ ++ int bindex; ++ struct inode *inode; ++ struct inode *lower_inode; ++ ++ /* ignore if we're called on semi-initialized dentries/inodes */ ++ if (!dentry || !UNIONFS_D(dentry)) ++ return false; ++ inode = dentry->d_inode; ++ if (!inode || !UNIONFS_I(inode)->lower_inodes || ++ ibstart(inode) < 0 || ibend(inode) < 0) ++ return false; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ ++ /* check if mtime/ctime have changed */ ++ if (unlikely(timespec_compare(&inode->i_mtime, ++ &lower_inode->i_mtime) < 0)) { ++ if ((lower_inode->i_mtime.tv_sec - ++ inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) { ++ pr_info("unionfs: new lower inode mtime " ++ "(bindex=%d, name=%s)\n", bindex, ++ dentry->d_name.name); ++ show_dinode_times(dentry); ++ } ++ return true; ++ } ++ if (unlikely(timespec_compare(&inode->i_ctime, ++ &lower_inode->i_ctime) < 0)) { ++ if ((lower_inode->i_ctime.tv_sec - ++ inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) { ++ pr_info("unionfs: new lower inode ctime " ++ "(bindex=%d, name=%s)\n", bindex, ++ dentry->d_name.name); ++ show_dinode_times(dentry); ++ } ++ return true; ++ } ++ } ++ ++ /* ++ * Last check: if this is a positive dentry, but somehow all lower ++ * dentries are negative or unhashed, then this dentry needs to be ++ * revalidated, because someone probably deleted the objects from ++ * the lower branches directly. ++ */ ++ if (is_negative_lower(dentry)) ++ return true; ++ ++ return false; /* default: lower is not newer */ ++} ++ ++/* ++ * Purge and invalidate as many data pages of a unionfs inode. This is ++ * called when the lower inode has changed, and we want to force processes ++ * to re-get the new data. ++ */ ++static inline void purge_inode_data(struct inode *inode) ++{ ++ /* remove all non-private mappings */ ++ unmap_mapping_range(inode->i_mapping, 0, 0, 0); ++ /* invalidate as many pages as possible */ ++ invalidate_mapping_pages(inode->i_mapping, 0, -1); ++ /* ++ * Don't try to truncate_inode_pages here, because this could lead ++ * to a deadlock between some of address_space ops and dentry ++ * revalidation: the address space op is invoked with a lock on our ++ * own page, and truncate_inode_pages will block on locked pages. ++ */ ++} ++ ++/* ++ * Revalidate a single file/symlink/special dentry. Assume that info nodes ++ * of the dentry and its parent are locked. Assume that parent(s) are all ++ * valid already, but the child may not yet be valid. Returns true if ++ * valid, false otherwise. ++ */ ++bool __unionfs_d_revalidate_one_locked(struct dentry *dentry, ++ struct nameidata *nd, ++ bool willwrite) ++{ ++ bool valid = false; /* default is invalid */ ++ int sbgen, dgen, bindex; ++ ++ verify_locked(dentry); ++ verify_locked(dentry->d_parent); ++ ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ ++ if (unlikely(is_newer_lower(dentry))) { ++ /* root dentry special case as aforementioned */ ++ if (IS_ROOT(dentry)) { ++ unionfs_copy_attr_times(dentry->d_inode); ++ } else { ++ /* ++ * reset generation number to zero, guaranteed to be ++ * "old" ++ */ ++ dgen = 0; ++ atomic_set(&UNIONFS_D(dentry)->generation, dgen); ++ } ++ if (!willwrite) ++ purge_inode_data(dentry->d_inode); ++ } ++ valid = __unionfs_d_revalidate_one(dentry, nd); ++ ++ /* ++ * If __unionfs_d_revalidate_one() succeeded above, then it will ++ * have incremented the refcnt of the mnt's, but also the branch ++ * indices of the dentry will have been updated (to take into ++ * account any branch insertions/deletion. So the current ++ * dbstart/dbend match the current, and new, indices of the mnts ++ * which __unionfs_d_revalidate_one has incremented. Note: the "if" ++ * test below does not depend on whether chain_len was 0 or greater. ++ */ ++ if (!valid || sbgen == dgen) ++ goto out; ++ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) ++ unionfs_mntput(dentry, bindex); ++out: ++ return valid; ++} ++ ++/* ++ * Revalidate a parent chain of dentries, then the actual node. ++ * Assumes that dentry is locked, but will lock all parents if/when needed. ++ * ++ * If 'willwrite' is true, and the lower inode times are not in sync, then ++ * *don't* purge_inode_data, as it could deadlock if ->write calls us and we ++ * try to truncate a locked page. Besides, if unionfs is about to write ++ * data to a file, then there's the data unionfs is about to write is more ++ * authoritative than what's below, therefore we can safely overwrite the ++ * lower inode times and data. ++ */ ++bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd, ++ bool willwrite) ++{ ++ bool valid = false; /* default is invalid */ ++ struct dentry **chain = NULL; /* chain of dentries to reval */ ++ int chain_len = 0; ++ struct dentry *dtmp; ++ int sbgen, dgen, i; ++ int saved_bstart, saved_bend, bindex; ++ ++ /* find length of chain needed to revalidate */ ++ /* XXX: should I grab some global (dcache?) lock? */ ++ chain_len = 0; ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ dtmp = dentry->d_parent; ++ verify_locked(dentry); ++ if (dentry != dtmp) ++ unionfs_lock_dentry(dtmp, UNIONFS_DMUTEX_REVAL_PARENT); ++ dgen = atomic_read(&UNIONFS_D(dtmp)->generation); ++ /* XXX: should we check if is_newer_lower all the way up? */ ++ if (unlikely(is_newer_lower(dtmp))) { ++ /* ++ * Special case: the root dentry's generation number must ++ * always be valid, but its lower inode times don't have to ++ * be, so sync up the times only. ++ */ ++ if (IS_ROOT(dtmp)) { ++ unionfs_copy_attr_times(dtmp->d_inode); ++ } else { ++ /* ++ * reset generation number to zero, guaranteed to be ++ * "old" ++ */ ++ dgen = 0; ++ atomic_set(&UNIONFS_D(dtmp)->generation, dgen); ++ } ++ purge_inode_data(dtmp->d_inode); ++ } ++ if (dentry != dtmp) ++ unionfs_unlock_dentry(dtmp); ++ while (sbgen != dgen) { ++ /* The root entry should always be valid */ ++ BUG_ON(IS_ROOT(dtmp)); ++ chain_len++; ++ dtmp = dtmp->d_parent; ++ dgen = atomic_read(&UNIONFS_D(dtmp)->generation); ++ } ++ if (chain_len == 0) ++ goto out_this; /* shortcut if parents are OK */ ++ ++ /* ++ * Allocate array of dentries to reval. We could use linked lists, ++ * but the number of entries we need to alloc here is often small, ++ * and short lived, so locality will be better. ++ */ ++ chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL); ++ if (unlikely(!chain)) { ++ printk(KERN_CRIT "unionfs: no more memory in %s\n", ++ __func__); ++ goto out; ++ } ++ ++ /* grab all dentries in chain, in child to parent order */ ++ dtmp = dentry; ++ for (i = chain_len-1; i >= 0; i--) ++ dtmp = chain[i] = dget_parent(dtmp); ++ ++ /* ++ * call __unionfs_d_revalidate_one() on each dentry, but in parent ++ * to child order. ++ */ ++ for (i = 0; i < chain_len; i++) { ++ unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD); ++ if (chain[i] != chain[i]->d_parent) ++ unionfs_lock_dentry(chain[i]->d_parent, ++ UNIONFS_DMUTEX_REVAL_PARENT); ++ saved_bstart = dbstart(chain[i]); ++ saved_bend = dbend(chain[i]); ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(chain[i])->generation); ++ ++ valid = __unionfs_d_revalidate_one(chain[i], nd); ++ /* XXX: is this the correct mntput condition?! */ ++ if (valid && chain_len > 0 && ++ sbgen != dgen && chain[i]->d_inode && ++ S_ISDIR(chain[i]->d_inode->i_mode)) { ++ for (bindex = saved_bstart; bindex <= saved_bend; ++ bindex++) ++ unionfs_mntput(chain[i], bindex); ++ } ++ if (chain[i] != chain[i]->d_parent) ++ unionfs_unlock_dentry(chain[i]->d_parent); ++ unionfs_unlock_dentry(chain[i]); ++ ++ if (unlikely(!valid)) ++ goto out_free; ++ } ++ ++ ++out_this: ++ /* finally, lock this dentry and revalidate it */ ++ verify_locked(dentry); /* verify child is locked */ ++ if (dentry != dentry->d_parent) ++ unionfs_lock_dentry(dentry->d_parent, ++ UNIONFS_DMUTEX_REVAL_PARENT); ++ valid = __unionfs_d_revalidate_one_locked(dentry, nd, willwrite); ++ if (dentry != dentry->d_parent) ++ unionfs_unlock_dentry(dentry->d_parent); ++ ++out_free: ++ /* unlock/dput all dentries in chain and return status */ ++ if (chain_len > 0) { ++ for (i = 0; i < chain_len; i++) ++ dput(chain[i]); ++ kfree(chain); ++ } ++out: ++ return valid; ++} ++ ++static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) ++{ ++ int err; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ err = __unionfs_d_revalidate_chain(dentry, nd, false); ++ if (likely(err > 0)) { /* true==1: dentry is valid */ ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_dentry(dentry); ++ unionfs_check_nd(nd); ++ } ++ unionfs_unlock_dentry(dentry); ++ ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++static void unionfs_d_release(struct dentry *dentry) ++{ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ if (unlikely(!UNIONFS_D(dentry))) ++ goto out; /* skip if no lower branches */ ++ /* must lock our branch configuration here */ ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ unionfs_check_dentry(dentry); ++ /* this could be a negative dentry, so check first */ ++ if (dbstart(dentry) < 0) { ++ unionfs_unlock_dentry(dentry); ++ goto out; /* due to a (normal) failed lookup */ ++ } ++ ++ /* Release all the lower dentries */ ++ path_put_lowers_all(dentry, true); ++ ++ unionfs_unlock_dentry(dentry); ++ ++out: ++ free_dentry_private_data(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return; ++} ++ ++/* ++ * Called when we're removing the last reference to our dentry. So we ++ * should drop all lower references too. ++ */ ++static void unionfs_d_iput(struct dentry *dentry, struct inode *inode) ++{ ++ int rc; ++ ++ BUG_ON(!dentry); ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (!UNIONFS_D(dentry) || dbstart(dentry) < 0) ++ goto drop_lower_inodes; ++ path_put_lowers_all(dentry, false); ++ ++drop_lower_inodes: ++ rc = atomic_read(&inode->i_count); ++ if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ iput(unionfs_lower_inode(inode)); ++ lockdep_on(); ++ unionfs_set_lower_inode(inode, NULL); ++ /* XXX: may need to set start/end to -1? */ ++ } ++ ++ iput(inode); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++} ++ ++struct dentry_operations unionfs_dops = { ++ .d_revalidate = unionfs_d_revalidate, ++ .d_release = unionfs_d_release, ++ .d_iput = unionfs_d_iput, ++}; +diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c +new file mode 100644 +index 0000000..14ca7d3 +--- /dev/null ++++ b/fs/unionfs/dirfops.c +@@ -0,0 +1,292 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* Make sure our rdstate is playing by the rules. */ ++static void verify_rdstate_offset(struct unionfs_dir_state *rdstate) ++{ ++ BUG_ON(rdstate->offset >= DIREOF); ++ BUG_ON(rdstate->cookie >= MAXRDCOOKIE); ++} ++ ++struct unionfs_getdents_callback { ++ struct unionfs_dir_state *rdstate; ++ void *dirent; ++ int entries_written; ++ int filldir_called; ++ int filldir_error; ++ filldir_t filldir; ++ struct super_block *sb; ++}; ++ ++/* based on generic filldir in fs/readir.c */ ++static int unionfs_filldir(void *dirent, const char *oname, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ struct unionfs_getdents_callback *buf = dirent; ++ struct filldir_node *found = NULL; ++ int err = 0; ++ int is_whiteout; ++ char *name = (char *) oname; ++ ++ buf->filldir_called++; ++ ++ is_whiteout = is_whiteout_name(&name, &namelen); ++ ++ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout); ++ ++ if (found) { ++ /* ++ * If we had non-whiteout entry in dir cache, then mark it ++ * as a whiteout and but leave it in the dir cache. ++ */ ++ if (is_whiteout && !found->whiteout) ++ found->whiteout = is_whiteout; ++ goto out; ++ } ++ ++ /* if 'name' isn't a whiteout, filldir it. */ ++ if (!is_whiteout) { ++ off_t pos = rdstate2offset(buf->rdstate); ++ u64 unionfs_ino = ino; ++ ++ err = buf->filldir(buf->dirent, name, namelen, pos, ++ unionfs_ino, d_type); ++ buf->rdstate->offset++; ++ verify_rdstate_offset(buf->rdstate); ++ } ++ /* ++ * If we did fill it, stuff it in our hash, otherwise return an ++ * error. ++ */ ++ if (err) { ++ buf->filldir_error = err; ++ goto out; ++ } ++ buf->entries_written++; ++ err = add_filldir_node(buf->rdstate, name, namelen, ++ buf->rdstate->bindex, is_whiteout); ++ if (err) ++ buf->filldir_error = err; ++ ++out: ++ return err; ++} ++ ++static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err = 0; ++ struct file *lower_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ struct inode *inode = NULL; ++ struct unionfs_getdents_callback buf; ++ struct unionfs_dir_state *uds; ++ int bend; ++ loff_t offset; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, false); ++ if (unlikely(err)) ++ goto out; ++ ++ inode = dentry->d_inode; ++ ++ uds = UNIONFS_F(file)->rdstate; ++ if (!uds) { ++ if (file->f_pos == DIREOF) { ++ goto out; ++ } else if (file->f_pos > 0) { ++ uds = find_rdstate(inode, file->f_pos); ++ if (unlikely(!uds)) { ++ err = -ESTALE; ++ goto out; ++ } ++ UNIONFS_F(file)->rdstate = uds; ++ } else { ++ init_rdstate(file); ++ uds = UNIONFS_F(file)->rdstate; ++ } ++ } ++ bend = fbend(file); ++ ++ while (uds->bindex <= bend) { ++ lower_file = unionfs_lower_file_idx(file, uds->bindex); ++ if (!lower_file) { ++ uds->bindex++; ++ uds->dirpos = 0; ++ continue; ++ } ++ ++ /* prepare callback buffer */ ++ buf.filldir_called = 0; ++ buf.filldir_error = 0; ++ buf.entries_written = 0; ++ buf.dirent = dirent; ++ buf.filldir = filldir; ++ buf.rdstate = uds; ++ buf.sb = inode->i_sb; ++ ++ /* Read starting from where we last left off. */ ++ offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET); ++ if (offset < 0) { ++ err = offset; ++ goto out; ++ } ++ err = vfs_readdir(lower_file, unionfs_filldir, &buf); ++ ++ /* Save the position for when we continue. */ ++ offset = vfs_llseek(lower_file, 0, SEEK_CUR); ++ if (offset < 0) { ++ err = offset; ++ goto out; ++ } ++ uds->dirpos = offset; ++ ++ /* Copy the atime. */ ++ fsstack_copy_attr_atime(inode, ++ lower_file->f_path.dentry->d_inode); ++ ++ if (err < 0) ++ goto out; ++ ++ if (buf.filldir_error) ++ break; ++ ++ if (!buf.entries_written) { ++ uds->bindex++; ++ uds->dirpos = 0; ++ } ++ } ++ ++ if (!buf.filldir_error && uds->bindex >= bend) { ++ /* Save the number of hash entries for next time. */ ++ UNIONFS_I(inode)->hashsize = uds->hashentries; ++ free_rdstate(uds); ++ UNIONFS_F(file)->rdstate = NULL; ++ file->f_pos = DIREOF; ++ } else { ++ file->f_pos = rdstate2offset(uds); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * This is not meant to be a generic repositioning function. If you do ++ * things that aren't supported, then we return EINVAL. ++ * ++ * What is allowed: ++ * (1) seeking to the same position that you are currently at ++ * This really has no effect, but returns where you are. ++ * (2) seeking to the beginning of the file ++ * This throws out all state, and lets you begin again. ++ */ ++static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin) ++{ ++ struct unionfs_dir_state *rdstate; ++ struct dentry *dentry = file->f_path.dentry; ++ loff_t err; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, false); ++ if (unlikely(err)) ++ goto out; ++ ++ rdstate = UNIONFS_F(file)->rdstate; ++ ++ /* ++ * we let users seek to their current position, but not anywhere ++ * else. ++ */ ++ if (!offset) { ++ switch (origin) { ++ case SEEK_SET: ++ if (rdstate) { ++ free_rdstate(rdstate); ++ UNIONFS_F(file)->rdstate = NULL; ++ } ++ init_rdstate(file); ++ err = 0; ++ break; ++ case SEEK_CUR: ++ err = file->f_pos; ++ break; ++ case SEEK_END: ++ /* Unsupported, because we would break everything. */ ++ err = -EINVAL; ++ break; ++ } ++ } else { ++ switch (origin) { ++ case SEEK_SET: ++ if (rdstate) { ++ if (offset == rdstate2offset(rdstate)) ++ err = offset; ++ else if (file->f_pos == DIREOF) ++ err = DIREOF; ++ else ++ err = -EINVAL; ++ } else { ++ struct inode *inode; ++ inode = dentry->d_inode; ++ rdstate = find_rdstate(inode, offset); ++ if (rdstate) { ++ UNIONFS_F(file)->rdstate = rdstate; ++ err = rdstate->offset; ++ } else { ++ err = -EINVAL; ++ } ++ } ++ break; ++ case SEEK_CUR: ++ case SEEK_END: ++ /* Unsupported, because we would break everything. */ ++ err = -EINVAL; ++ break; ++ } ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * Trimmed directory options, we shouldn't pass everything down since ++ * we don't want to operate on partial directories. ++ */ ++struct file_operations unionfs_dir_fops = { ++ .llseek = unionfs_dir_llseek, ++ .read = generic_read_dir, ++ .readdir = unionfs_readdir, ++ .unlocked_ioctl = unionfs_ioctl, ++ .open = unionfs_open, ++ .release = unionfs_file_release, ++ .flush = unionfs_flush, ++ .fsync = unionfs_fsync, ++ .fasync = unionfs_fasync, ++}; +diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c +new file mode 100644 +index 0000000..d936f03 +--- /dev/null ++++ b/fs/unionfs/dirhelper.c +@@ -0,0 +1,157 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++#define RD_NONE 0 ++#define RD_CHECK_EMPTY 1 ++/* The callback structure for check_empty. */ ++struct unionfs_rdutil_callback { ++ int err; ++ int filldir_called; ++ struct unionfs_dir_state *rdstate; ++ int mode; ++}; ++ ++/* This filldir function makes sure only whiteouts exist within a directory. */ ++static int readdir_util_callback(void *dirent, const char *oname, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ int err = 0; ++ struct unionfs_rdutil_callback *buf = dirent; ++ int is_whiteout; ++ struct filldir_node *found; ++ char *name = (char *) oname; ++ ++ buf->filldir_called = 1; ++ ++ if (name[0] == '.' && (namelen == 1 || ++ (name[1] == '.' && namelen == 2))) ++ goto out; ++ ++ is_whiteout = is_whiteout_name(&name, &namelen); ++ ++ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout); ++ /* If it was found in the table there was a previous whiteout. */ ++ if (found) ++ goto out; ++ ++ /* ++ * if it wasn't found and isn't a whiteout, the directory isn't ++ * empty. ++ */ ++ err = -ENOTEMPTY; ++ if ((buf->mode == RD_CHECK_EMPTY) && !is_whiteout) ++ goto out; ++ ++ err = add_filldir_node(buf->rdstate, name, namelen, ++ buf->rdstate->bindex, is_whiteout); ++ ++out: ++ buf->err = err; ++ return err; ++} ++ ++/* Is a directory logically empty? */ ++int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct vfsmount *mnt; ++ struct super_block *sb; ++ struct file *lower_file; ++ struct unionfs_rdutil_callback *buf = NULL; ++ int bindex, bstart, bend, bopaque; ++ ++ sb = dentry->d_sb; ++ ++ ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ ++ err = unionfs_partial_lookup(dentry); ++ if (err) ++ goto out; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ bopaque = dbopaque(dentry); ++ if (0 <= bopaque && bopaque < bend) ++ bend = bopaque; ++ ++ buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL); ++ if (unlikely(!buf)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ buf->err = 0; ++ buf->mode = RD_CHECK_EMPTY; ++ buf->rdstate = alloc_rdstate(dentry->d_inode, bstart); ++ if (unlikely(!buf->rdstate)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* Process the lower directories with rdutil_callback as a filldir. */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (!lower_dentry->d_inode) ++ continue; ++ if (!S_ISDIR(lower_dentry->d_inode->i_mode)) ++ continue; ++ ++ dget(lower_dentry); ++ mnt = unionfs_mntget(dentry, bindex); ++ branchget(sb, bindex); ++ lower_file = dentry_open(lower_dentry, mnt, O_RDONLY); ++ if (IS_ERR(lower_file)) { ++ err = PTR_ERR(lower_file); ++ branchput(sb, bindex); ++ goto out; ++ } ++ ++ do { ++ buf->filldir_called = 0; ++ buf->rdstate->bindex = bindex; ++ err = vfs_readdir(lower_file, ++ readdir_util_callback, buf); ++ if (buf->err) ++ err = buf->err; ++ } while ((err >= 0) && buf->filldir_called); ++ ++ /* fput calls dput for lower_dentry */ ++ fput(lower_file); ++ branchput(sb, bindex); ++ ++ if (err < 0) ++ goto out; ++ } ++ ++out: ++ if (buf) { ++ if (namelist && !err) ++ *namelist = buf->rdstate; ++ else if (buf->rdstate) ++ free_rdstate(buf->rdstate); ++ kfree(buf); ++ } ++ ++ ++ return err; ++} +diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h +new file mode 100644 +index 0000000..4f264de +--- /dev/null ++++ b/fs/unionfs/fanout.h +@@ -0,0 +1,384 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _FANOUT_H_ ++#define _FANOUT_H_ ++ ++/* ++ * Inode to private data ++ * ++ * Since we use containers and the struct inode is _inside_ the ++ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL ++ * inode pointer), return a valid non-NULL pointer. ++ */ ++static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode) ++{ ++ return container_of(inode, struct unionfs_inode_info, vfs_inode); ++} ++ ++#define ibstart(ino) (UNIONFS_I(ino)->bstart) ++#define ibend(ino) (UNIONFS_I(ino)->bend) ++ ++/* Dentry to private data */ ++#define UNIONFS_D(dent) ((struct unionfs_dentry_info *)(dent)->d_fsdata) ++#define dbstart(dent) (UNIONFS_D(dent)->bstart) ++#define dbend(dent) (UNIONFS_D(dent)->bend) ++#define dbopaque(dent) (UNIONFS_D(dent)->bopaque) ++ ++/* Superblock to private data */ ++#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info) ++#define sbstart(sb) 0 ++#define sbend(sb) (UNIONFS_SB(sb)->bend) ++#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1) ++#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id) ++ ++/* File to private Data */ ++#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data)) ++#define fbstart(file) (UNIONFS_F(file)->bstart) ++#define fbend(file) (UNIONFS_F(file)->bend) ++ ++/* macros to manipulate branch IDs in stored in our superblock */ ++static inline int branch_id(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return UNIONFS_SB(sb)->data[index].branch_id; ++} ++ ++static inline void set_branch_id(struct super_block *sb, int index, int val) ++{ ++ BUG_ON(!sb || index < 0); ++ UNIONFS_SB(sb)->data[index].branch_id = val; ++} ++ ++static inline void new_branch_id(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id); ++} ++ ++/* ++ * Find new index of matching branch with an existing superblock of a known ++ * (possibly old) id. This is needed because branches could have been ++ * added/deleted causing the branches of any open files to shift. ++ * ++ * @sb: the new superblock which may have new/different branch IDs ++ * @id: the old/existing id we're looking for ++ * Returns index of newly found branch (0 or greater), -1 otherwise. ++ */ ++static inline int branch_id_to_idx(struct super_block *sb, int id) ++{ ++ int i; ++ for (i = 0; i < sbmax(sb); i++) { ++ if (branch_id(sb, i) == id) ++ return i; ++ } ++ /* in the non-ODF code, this should really never happen */ ++ printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id); ++ return -1; ++} ++ ++/* File to lower file. */ ++static inline struct file *unionfs_lower_file(const struct file *f) ++{ ++ BUG_ON(!f); ++ return UNIONFS_F(f)->lower_files[fbstart(f)]; ++} ++ ++static inline struct file *unionfs_lower_file_idx(const struct file *f, ++ int index) ++{ ++ BUG_ON(!f || index < 0); ++ return UNIONFS_F(f)->lower_files[index]; ++} ++ ++static inline void unionfs_set_lower_file_idx(struct file *f, int index, ++ struct file *val) ++{ ++ BUG_ON(!f || index < 0); ++ UNIONFS_F(f)->lower_files[index] = val; ++ /* save branch ID (may be redundant?) */ ++ UNIONFS_F(f)->saved_branch_ids[index] = ++ branch_id((f)->f_path.dentry->d_sb, index); ++} ++ ++static inline void unionfs_set_lower_file(struct file *f, struct file *val) ++{ ++ BUG_ON(!f); ++ unionfs_set_lower_file_idx((f), fbstart(f), (val)); ++} ++ ++/* Inode to lower inode. */ ++static inline struct inode *unionfs_lower_inode(const struct inode *i) ++{ ++ BUG_ON(!i); ++ return UNIONFS_I(i)->lower_inodes[ibstart(i)]; ++} ++ ++static inline struct inode *unionfs_lower_inode_idx(const struct inode *i, ++ int index) ++{ ++ BUG_ON(!i || index < 0); ++ return UNIONFS_I(i)->lower_inodes[index]; ++} ++ ++static inline void unionfs_set_lower_inode_idx(struct inode *i, int index, ++ struct inode *val) ++{ ++ BUG_ON(!i || index < 0); ++ UNIONFS_I(i)->lower_inodes[index] = val; ++} ++ ++static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val) ++{ ++ BUG_ON(!i); ++ UNIONFS_I(i)->lower_inodes[ibstart(i)] = val; ++} ++ ++/* Superblock to lower superblock. */ ++static inline struct super_block *unionfs_lower_super( ++ const struct super_block *sb) ++{ ++ BUG_ON(!sb); ++ return UNIONFS_SB(sb)->data[sbstart(sb)].sb; ++} ++ ++static inline struct super_block *unionfs_lower_super_idx( ++ const struct super_block *sb, ++ int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return UNIONFS_SB(sb)->data[index].sb; ++} ++ ++static inline void unionfs_set_lower_super_idx(struct super_block *sb, ++ int index, ++ struct super_block *val) ++{ ++ BUG_ON(!sb || index < 0); ++ UNIONFS_SB(sb)->data[index].sb = val; ++} ++ ++static inline void unionfs_set_lower_super(struct super_block *sb, ++ struct super_block *val) ++{ ++ BUG_ON(!sb); ++ UNIONFS_SB(sb)->data[sbstart(sb)].sb = val; ++} ++ ++/* Branch count macros. */ ++static inline int branch_count(const struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return atomic_read(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++static inline void set_branch_count(struct super_block *sb, int index, int val) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val); ++} ++ ++static inline void branchget(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_inc(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++static inline void branchput(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_dec(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++/* Dentry macros */ ++static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index, ++ struct dentry *val) ++{ ++ BUG_ON(!dent || index < 0); ++ UNIONFS_D(dent)->lower_paths[index].dentry = val; ++} ++ ++static inline struct dentry *unionfs_lower_dentry_idx( ++ const struct dentry *dent, ++ int index) ++{ ++ BUG_ON(!dent || index < 0); ++ return UNIONFS_D(dent)->lower_paths[index].dentry; ++} ++ ++static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return unionfs_lower_dentry_idx(dent, dbstart(dent)); ++} ++ ++static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index, ++ struct vfsmount *mnt) ++{ ++ BUG_ON(!dent || index < 0); ++ UNIONFS_D(dent)->lower_paths[index].mnt = mnt; ++} ++ ++static inline struct vfsmount *unionfs_lower_mnt_idx( ++ const struct dentry *dent, ++ int index) ++{ ++ BUG_ON(!dent || index < 0); ++ return UNIONFS_D(dent)->lower_paths[index].mnt; ++} ++ ++static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return unionfs_lower_mnt_idx(dent, dbstart(dent)); ++} ++ ++/* Macros for locking a dentry. */ ++enum unionfs_dentry_lock_class { ++ UNIONFS_DMUTEX_NORMAL, ++ UNIONFS_DMUTEX_ROOT, ++ UNIONFS_DMUTEX_PARENT, ++ UNIONFS_DMUTEX_CHILD, ++ UNIONFS_DMUTEX_WHITEOUT, ++ UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */ ++ UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */ ++}; ++ ++static inline void unionfs_lock_dentry(struct dentry *d, ++ unsigned int subclass) ++{ ++ BUG_ON(!d); ++ mutex_lock_nested(&UNIONFS_D(d)->lock, subclass); ++} ++ ++static inline void unionfs_unlock_dentry(struct dentry *d) ++{ ++ BUG_ON(!d); ++ mutex_unlock(&UNIONFS_D(d)->lock); ++} ++ ++static inline void verify_locked(struct dentry *d) ++{ ++ BUG_ON(!d); ++ BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock)); ++} ++ ++/* macros to put lower objects */ ++ ++/* ++ * iput lower inodes of an unionfs dentry, from bstart to bend. If ++ * @free_lower is true, then also kfree the memory used to hold the lower ++ * object pointers. ++ */ ++static inline void iput_lowers(struct inode *inode, ++ int bstart, int bend, bool free_lower) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ ++ BUG_ON(!inode); ++ BUG_ON(!UNIONFS_I(inode)); ++ BUG_ON(bstart < 0); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (lower_inode) { ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ iput(lower_inode); ++ lockdep_on(); ++ } ++ } ++ ++ if (free_lower) { ++ kfree(UNIONFS_I(inode)->lower_inodes); ++ UNIONFS_I(inode)->lower_inodes = NULL; ++ } ++} ++ ++/* iput all lower inodes, and reset start/end branch indices to -1 */ ++static inline void iput_lowers_all(struct inode *inode, bool free_lower) ++{ ++ int bstart, bend; ++ ++ BUG_ON(!inode); ++ BUG_ON(!UNIONFS_I(inode)); ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ BUG_ON(bstart < 0); ++ ++ iput_lowers(inode, bstart, bend, free_lower); ++ ibstart(inode) = ibend(inode) = -1; ++} ++ ++/* ++ * dput/mntput all lower dentries and vfsmounts of an unionfs dentry, from ++ * bstart to bend. If @free_lower is true, then also kfree the memory used ++ * to hold the lower object pointers. ++ * ++ * XXX: implement using path_put VFS macros ++ */ ++static inline void path_put_lowers(struct dentry *dentry, ++ int bstart, int bend, bool free_lower) ++{ ++ struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; ++ int bindex; ++ ++ BUG_ON(!dentry); ++ BUG_ON(!UNIONFS_D(dentry)); ++ BUG_ON(bstart < 0); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (lower_dentry) { ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ dput(lower_dentry); ++ } ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (lower_mnt) { ++ unionfs_set_lower_mnt_idx(dentry, bindex, NULL); ++ mntput(lower_mnt); ++ } ++ } ++ ++ if (free_lower) { ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ } ++} ++ ++/* ++ * dput/mntput all lower dentries and vfsmounts, and reset start/end branch ++ * indices to -1. ++ */ ++static inline void path_put_lowers_all(struct dentry *dentry, bool free_lower) ++{ ++ int bstart, bend; ++ ++ BUG_ON(!dentry); ++ BUG_ON(!UNIONFS_D(dentry)); ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ BUG_ON(bstart < 0); ++ ++ path_put_lowers(dentry, bstart, bend, free_lower); ++ dbstart(dentry) = dbend(dentry) = -1; ++} ++ ++#endif /* not _FANOUT_H */ +diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c +new file mode 100644 +index 0000000..965d071 +--- /dev/null ++++ b/fs/unionfs/file.c +@@ -0,0 +1,341 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++static ssize_t unionfs_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ int err; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ err = unionfs_file_revalidate(file, false); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_read(lower_file, buf, count, ppos); ++ /* update our inode atime upon a successful lower read */ ++ if (err >= 0) { ++ fsstack_copy_attr_atime(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_write(struct file *file, const char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ int err = 0; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ if (dentry != dentry->d_parent) ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ err = unionfs_file_revalidate_locked(file, true); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_write(lower_file, buf, count, ppos); ++ /* update our inode times+sizes upon a successful lower write */ ++ if (err >= 0) { ++ fsstack_copy_inode_size(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ fsstack_copy_attr_times(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */ ++ unionfs_check_file(file); ++ } ++ ++out: ++ if (dentry != dentry->d_parent) ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_file_readdir(struct file *file, void *dirent, ++ filldir_t filldir) ++{ ++ return -ENOTDIR; ++} ++ ++static int unionfs_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int err = 0; ++ bool willwrite; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct vm_operations_struct *saved_vm_ops = NULL; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ /* This might be deferred to mmap's writepage */ ++ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); ++ err = unionfs_file_revalidate(file, willwrite); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ /* ++ * File systems which do not implement ->writepage may use ++ * generic_file_readonly_mmap as their ->mmap op. If you call ++ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. ++ * But we cannot call the lower ->mmap op, so we can't tell that ++ * writeable mappings won't work. Therefore, our only choice is to ++ * check if the lower file system supports the ->writepage, and if ++ * not, return EINVAL (the same error that ++ * generic_file_readonly_mmap returns in that case). ++ */ ++ lower_file = unionfs_lower_file(file); ++ if (willwrite && !lower_file->f_mapping->a_ops->writepage) { ++ err = -EINVAL; ++ printk(KERN_ERR "unionfs: branch %d file system does not " ++ "support writeable mmap\n", fbstart(file)); ++ goto out; ++ } ++ ++ /* ++ * find and save lower vm_ops. ++ * ++ * XXX: the VFS should have a cleaner way of finding the lower vm_ops ++ */ ++ if (!UNIONFS_F(file)->lower_vm_ops) { ++ err = lower_file->f_op->mmap(lower_file, vma); ++ if (err) { ++ printk(KERN_ERR "unionfs: lower mmap failed %d\n", err); ++ goto out; ++ } ++ saved_vm_ops = vma->vm_ops; ++ err = do_munmap(current->mm, vma->vm_start, ++ vma->vm_end - vma->vm_start); ++ if (err) { ++ printk(KERN_ERR "unionfs: do_munmap failed %d\n", err); ++ goto out; ++ } ++ } ++ ++ file->f_mapping->a_ops = &unionfs_dummy_aops; ++ err = generic_file_mmap(file, vma); ++ file->f_mapping->a_ops = &unionfs_aops; ++ if (err) { ++ printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err); ++ goto out; ++ } ++ vma->vm_ops = &unionfs_vm_ops; ++ if (!UNIONFS_F(file)->lower_vm_ops) ++ UNIONFS_F(file)->lower_vm_ops = saved_vm_ops; ++ ++out: ++ if (!err) { ++ /* copyup could cause parent dir times to change */ ++ unionfs_copy_attr_times(dentry->d_parent->d_inode); ++ unionfs_check_file(file); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++int unionfs_fsync(struct file *file, struct dentry *dentry, int datasync) ++{ ++ int bindex, bstart, bend; ++ struct file *lower_file; ++ struct dentry *lower_dentry; ++ struct inode *lower_inode, *inode; ++ int err = -EINVAL; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ err = unionfs_file_revalidate(file, true); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ if (bstart < 0 || bend < 0) ++ goto out; ++ ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) { ++ printk(KERN_ERR ++ "unionfs: null lower inode in unionfs_fsync\n"); ++ goto out; ++ } ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode || !lower_inode->i_fop->fsync) ++ continue; ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ mutex_lock(&lower_inode->i_mutex); ++ err = lower_inode->i_fop->fsync(lower_file, ++ lower_dentry, ++ datasync); ++ if (!err && bindex == bstart) ++ fsstack_copy_attr_times(inode, lower_inode); ++ mutex_unlock(&lower_inode->i_mutex); ++ if (err) ++ goto out; ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++int unionfs_fasync(int fd, struct file *file, int flag) ++{ ++ int bindex, bstart, bend; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct inode *lower_inode, *inode; ++ int err = 0; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ err = unionfs_file_revalidate(file, true); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ if (bstart < 0 || bend < 0) ++ goto out; ++ ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) { ++ printk(KERN_ERR ++ "unionfs: null lower inode in unionfs_fasync\n"); ++ goto out; ++ } ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode || !lower_inode->i_fop->fasync) ++ continue; ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ mutex_lock(&lower_inode->i_mutex); ++ err = lower_inode->i_fop->fasync(fd, lower_file, flag); ++ if (!err && bindex == bstart) ++ fsstack_copy_attr_times(inode, lower_inode); ++ mutex_unlock(&lower_inode->i_mutex); ++ if (err) ++ goto out; ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_splice_read(struct file *file, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) ++{ ++ ssize_t err; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ err = unionfs_file_revalidate(file, false); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_splice_to(lower_file, ppos, pipe, len, flags); ++ /* update our inode atime upon a successful lower splice-read */ ++ if (err >= 0) { ++ fsstack_copy_attr_atime(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe, ++ struct file *file, loff_t *ppos, ++ size_t len, unsigned int flags) ++{ ++ ssize_t err = 0; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ err = unionfs_file_revalidate(file, true); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_splice_from(pipe, lower_file, ppos, len, flags); ++ /* update our inode times+sizes upon a successful lower write */ ++ if (err >= 0) { ++ fsstack_copy_inode_size(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ fsstack_copy_attr_times(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++struct file_operations unionfs_main_fops = { ++ .llseek = generic_file_llseek, ++ .read = unionfs_read, ++ .write = unionfs_write, ++ .readdir = unionfs_file_readdir, ++ .unlocked_ioctl = unionfs_ioctl, ++ .mmap = unionfs_mmap, ++ .open = unionfs_open, ++ .flush = unionfs_flush, ++ .release = unionfs_file_release, ++ .fsync = unionfs_fsync, ++ .fasync = unionfs_fasync, ++ .splice_read = unionfs_splice_read, ++ .splice_write = unionfs_splice_write, ++}; +diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c +new file mode 100644 +index 0000000..0bd9fab +--- /dev/null ++++ b/fs/unionfs/inode.c +@@ -0,0 +1,984 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Find a writeable branch to create new object in. Checks all writeble ++ * branches of the parent inode, from istart to iend order; if none are ++ * suitable, also tries branch 0 (which may require a copyup). ++ * ++ * Return a lower_dentry we can use to create object in, or ERR_PTR. ++ */ ++static struct dentry *find_writeable_branch(struct inode *parent, ++ struct dentry *dentry) ++{ ++ int err = -EINVAL; ++ int bindex, istart, iend; ++ struct dentry *lower_dentry = NULL; ++ ++ istart = ibstart(parent); ++ iend = ibend(parent); ++ if (istart < 0) ++ goto out; ++ ++begin: ++ for (bindex = istart; bindex <= iend; bindex++) { ++ /* skip non-writeable branches */ ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (err) { ++ err = -EROFS; ++ continue; ++ } ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ /* ++ * check for whiteouts in writeable branch, and remove them ++ * if necessary. ++ */ ++ err = check_unlink_whiteout(dentry, lower_dentry, bindex); ++ if (err > 0) /* ignore if whiteout found and removed */ ++ err = 0; ++ if (err) ++ continue; ++ /* if get here, we can write to the branch */ ++ break; ++ } ++ /* ++ * If istart wasn't already branch 0, and we got any error, then try ++ * branch 0 (which may require copyup) ++ */ ++ if (err && istart > 0) { ++ istart = iend = 0; ++ goto begin; ++ } ++ ++ /* ++ * If we tried even branch 0, and still got an error, abort. But if ++ * the error was an EROFS, then we should try to copyup. ++ */ ++ if (err && err != -EROFS) ++ goto out; ++ ++ /* ++ * If we get here, then check if copyup needed. If lower_dentry is ++ * NULL, create the entire dentry directory structure in branch 0. ++ */ ++ if (!lower_dentry) { ++ bindex = 0; ++ lower_dentry = create_parents(parent, dentry, ++ dentry->d_name.name, bindex); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ } ++ err = 0; /* all's well */ ++out: ++ if (err) ++ return ERR_PTR(err); ++ return lower_dentry; ++} ++ ++static int unionfs_create(struct inode *parent, struct dentry *dentry, ++ int mode, struct nameidata *nd) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ int valid = 0; ++ struct nameidata lower_nd; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; /* same as what real_lookup does */ ++ goto out; ++ } ++ ++ valid = __unionfs_d_revalidate_one_locked(dentry, nd, false); ++ /* ++ * It's only a bug if this dentry was not negative and couldn't be ++ * revalidated (shouldn't happen). ++ */ ++ BUG_ON(!valid && dentry->d_inode); ++ ++ lower_dentry = find_writeable_branch(parent, dentry); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out; ++ } ++ ++ err = init_lower_nd(&lower_nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, ++ &lower_nd); ++ release_lower_nd(&lower_nd, err); ++ ++ if (!err) { ++ err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(parent); ++ fsstack_copy_inode_size(parent, ++ lower_parent_dentry->d_inode); ++ /* update no. of links on parent directory */ ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ } ++ ++ unlock_dir(lower_parent_dentry); ++ ++out: ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_inode(parent); ++ unionfs_check_dentry(dentry); ++ unionfs_check_nd(nd); ++ } ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * unionfs_lookup is the only special function which takes a dentry, yet we ++ * do NOT want to call __unionfs_d_revalidate_chain because by definition, ++ * we don't have a valid dentry here yet. ++ */ ++static struct dentry *unionfs_lookup(struct inode *parent, ++ struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct path path_save = {NULL, NULL}; ++ struct dentry *ret; ++ int err = 0; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ if (dentry != dentry->d_parent) ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_ROOT); ++ ++ /* save the dentry & vfsmnt from namei */ ++ if (nd) { ++ path_save.dentry = nd->path.dentry; ++ path_save.mnt = nd->path.mnt; ++ } ++ ++ /* ++ * unionfs_lookup_backend returns a locked dentry upon success, ++ * so we'll have to unlock it below. ++ */ ++ ++ /* allocate dentry private data. We free it in ->d_release */ ++ err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD); ++ if (unlikely(err)) { ++ ret = ERR_PTR(err); ++ goto out; ++ } ++ ret = unionfs_lookup_full(dentry, nd, INTERPOSE_LOOKUP); ++ ++ /* restore the dentry & vfsmnt in namei */ ++ if (nd) { ++ nd->path.dentry = path_save.dentry; ++ nd->path.mnt = path_save.mnt; ++ } ++ if (!IS_ERR(ret)) { ++ if (ret) ++ dentry = ret; ++ /* lookup_full can return multiple positive dentries */ ++ if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) { ++ BUG_ON(dbstart(dentry) < 0); ++ unionfs_postcopyup_release(dentry); ++ } ++ unionfs_copy_attr_times(dentry->d_inode); ++ /* parent times may have changed */ ++ unionfs_copy_attr_times(dentry->d_parent->d_inode); ++ } ++ ++ unionfs_check_inode(parent); ++ if (!IS_ERR(ret)) { ++ unionfs_check_dentry(dentry); ++ unionfs_check_nd(nd); ++ } ++ unionfs_unlock_dentry(dentry); ++ ++out: ++ if (dentry != dentry->d_parent) { ++ unionfs_check_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry->d_parent); ++ } ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return ret; ++} ++ ++static int unionfs_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *new_dentry) ++{ ++ int err = 0; ++ struct dentry *lower_old_dentry = NULL; ++ struct dentry *lower_new_dentry = NULL; ++ struct dentry *lower_dir_dentry = NULL; ++ char *name = NULL; ++ ++ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_double_lock_dentry(new_dentry, old_dentry); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(old_dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (unlikely(new_dentry->d_inode && ++ !__unionfs_d_revalidate_chain(new_dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_new_dentry = unionfs_lower_dentry(new_dentry); ++ ++ /* check for a whiteout in new dentry branch, and delete it */ ++ err = check_unlink_whiteout(new_dentry, lower_new_dentry, ++ dbstart(new_dentry)); ++ if (err > 0) { /* whiteout found and removed successfully */ ++ lower_dir_dentry = dget_parent(lower_new_dentry); ++ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); ++ dput(lower_dir_dentry); ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ err = 0; ++ } ++ if (err) ++ goto out; ++ ++ /* check if parent hierachy is needed, then link in same branch */ ++ if (dbstart(old_dentry) != dbstart(new_dentry)) { ++ lower_new_dentry = create_parents(dir, new_dentry, ++ new_dentry->d_name.name, ++ dbstart(old_dentry)); ++ err = PTR_ERR(lower_new_dentry); ++ if (IS_COPYUP_ERR(err)) ++ goto docopyup; ++ if (!lower_new_dentry || IS_ERR(lower_new_dentry)) ++ goto out; ++ } ++ lower_new_dentry = unionfs_lower_dentry(new_dentry); ++ lower_old_dentry = unionfs_lower_dentry(old_dentry); ++ ++ BUG_ON(dbstart(old_dentry) != dbstart(new_dentry)); ++ lower_dir_dentry = lock_parent(lower_new_dentry); ++ err = is_robranch(old_dentry); ++ if (!err) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, ++ lower_new_dentry); ++ lockdep_on(); ++ } ++ unlock_dir(lower_dir_dentry); ++ ++docopyup: ++ if (IS_COPYUP_ERR(err)) { ++ int old_bstart = dbstart(old_dentry); ++ int bindex; ++ ++ for (bindex = old_bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_dentry(old_dentry->d_parent->d_inode, ++ old_dentry, old_bstart, ++ bindex, old_dentry->d_name.name, ++ old_dentry->d_name.len, NULL, ++ i_size_read(old_dentry->d_inode)); ++ if (err) ++ continue; ++ lower_new_dentry = ++ create_parents(dir, new_dentry, ++ new_dentry->d_name.name, ++ bindex); ++ lower_old_dentry = unionfs_lower_dentry(old_dentry); ++ lower_dir_dentry = lock_parent(lower_new_dentry); ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ /* do vfs_link */ ++ err = vfs_link(lower_old_dentry, ++ lower_dir_dentry->d_inode, ++ lower_new_dentry); ++ lockdep_on(); ++ unlock_dir(lower_dir_dentry); ++ goto check_link; ++ } ++ goto out; ++ } ++ ++check_link: ++ if (err || !lower_new_dentry->d_inode) ++ goto out; ++ ++ /* Its a hard link, so use the same inode */ ++ new_dentry->d_inode = igrab(old_dentry->d_inode); ++ d_add(new_dentry, new_dentry->d_inode); ++ unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode); ++ fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode); ++ ++ /* propagate number of hard-links */ ++ old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode); ++ /* new dentry's ctime may have changed due to hard-link counts */ ++ unionfs_copy_attr_times(new_dentry->d_inode); ++ ++out: ++ if (!new_dentry->d_inode) ++ d_drop(new_dentry); ++ ++ kfree(name); ++ if (!err) ++ unionfs_postcopyup_setmnt(new_dentry); ++ ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(new_dentry); ++ unionfs_check_dentry(old_dentry); ++ ++ unionfs_unlock_dentry(new_dentry); ++ unionfs_unlock_dentry(old_dentry); ++ unionfs_read_unlock(old_dentry->d_sb); ++ ++ return err; ++} ++ ++static int unionfs_symlink(struct inode *parent, struct dentry *dentry, ++ const char *symname) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ char *name = NULL; ++ int valid = 0; ++ umode_t mode; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (unlikely(dentry->d_inode && ++ !__unionfs_d_revalidate_one_locked(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ /* ++ * It's only a bug if this dentry was not negative and couldn't be ++ * revalidated (shouldn't happen). ++ */ ++ BUG_ON(!valid && dentry->d_inode); ++ ++ lower_dentry = find_writeable_branch(parent, dentry); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out; ++ } ++ ++ mode = S_IALLUGO; ++ err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname); ++ if (!err) { ++ err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(parent); ++ fsstack_copy_inode_size(parent, ++ lower_parent_dentry->d_inode); ++ /* update no. of links on parent directory */ ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ } ++ ++ unlock_dir(lower_parent_dentry); ++ ++out: ++ dput(wh_dentry); ++ kfree(name); ++ ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_inode(parent); ++ unionfs_check_dentry(dentry); ++ } ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ int bindex = 0, bstart; ++ char *name = NULL; ++ int valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; /* same as what real_lookup does */ ++ goto out; ++ } ++ if (unlikely(dentry->d_inode && ++ !__unionfs_d_revalidate_one_locked(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ /* check for a whiteout in new dentry branch, and delete it */ ++ err = check_unlink_whiteout(dentry, lower_dentry, bstart); ++ if (err > 0) /* whiteout found and removed successfully */ ++ err = 0; ++ if (err) { ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ bstart--; ++ } ++ ++ /* check if copyup's needed, and mkdir */ ++ for (bindex = bstart; bindex >= 0; bindex--) { ++ int i; ++ int bend = dbend(dentry); ++ ++ if (is_robranch_super(dentry->d_sb, bindex)) ++ continue; ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) { ++ lower_dentry = create_parents(parent, dentry, ++ dentry->d_name.name, ++ bindex); ++ if (!lower_dentry || IS_ERR(lower_dentry)) { ++ printk(KERN_ERR "unionfs: lower dentry " ++ " NULL for bindex = %d\n", bindex); ++ continue; ++ } ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out; ++ } ++ ++ err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, ++ mode); ++ ++ unlock_dir(lower_parent_dentry); ++ ++ /* did the mkdir succeed? */ ++ if (err) ++ break; ++ ++ for (i = bindex + 1; i < bend; i++) { ++ if (unionfs_lower_dentry_idx(dentry, i)) { ++ dput(unionfs_lower_dentry_idx(dentry, i)); ++ unionfs_set_lower_dentry_idx(dentry, i, NULL); ++ } ++ } ++ dbend(dentry) = bindex; ++ ++ /* ++ * Only INTERPOSE_LOOKUP can return a value other than 0 on ++ * err. ++ */ ++ err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(parent); ++ fsstack_copy_inode_size(parent, ++ lower_parent_dentry->d_inode); ++ ++ /* update number of links on parent directory */ ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ ++ err = make_dir_opaque(dentry, dbstart(dentry)); ++ if (err) { ++ printk(KERN_ERR "unionfs: mkdir: error creating " ++ ".wh.__dir_opaque: %d\n", err); ++ goto out; ++ } ++ ++ /* we are done! */ ++ break; ++ } ++ ++out: ++ if (!dentry->d_inode) ++ d_drop(dentry); ++ ++ kfree(name); ++ ++ if (!err) { ++ unionfs_copy_attr_times(dentry->d_inode); ++ unionfs_postcopyup_setmnt(dentry); ++ } ++ unionfs_check_inode(parent); ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++static int unionfs_mknod(struct inode *parent, struct dentry *dentry, int mode, ++ dev_t dev) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ char *name = NULL; ++ int valid = 0; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (unlikely(dentry->d_inode && ++ !__unionfs_d_revalidate_one_locked(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ /* ++ * It's only a bug if this dentry was not negative and couldn't be ++ * revalidated (shouldn't happen). ++ */ ++ BUG_ON(!valid && dentry->d_inode); ++ ++ lower_dentry = find_writeable_branch(parent, dentry); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out; ++ } ++ ++ err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev); ++ if (!err) { ++ err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(parent); ++ fsstack_copy_inode_size(parent, ++ lower_parent_dentry->d_inode); ++ /* update no. of links on parent directory */ ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ } ++ ++ unlock_dir(lower_parent_dentry); ++ ++out: ++ dput(wh_dentry); ++ kfree(name); ++ ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_inode(parent); ++ unionfs_check_dentry(dentry); ++ } ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_readlink(struct dentry *dentry, char __user *buf, ++ int bufsiz) ++{ ++ int err; ++ struct dentry *lower_dentry; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ if (!lower_dentry->d_inode->i_op || ++ !lower_dentry->d_inode->i_op->readlink) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = lower_dentry->d_inode->i_op->readlink(lower_dentry, ++ buf, bufsiz); ++ if (err > 0) ++ fsstack_copy_attr_atime(dentry->d_inode, ++ lower_dentry->d_inode); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++/* ++ * unionfs_follow_link takes a dentry, but it is simple. It only needs to ++ * allocate some memory and then call our ->readlink method. Our ++ * unionfs_readlink *does* lock our dentry and revalidate the dentry. ++ * Therefore, we do not have to lock our dentry here, to prevent a deadlock; ++ * nor do we need to revalidate it either. It is safe to not lock our ++ * dentry here, nor revalidate it, because unionfs_follow_link does not do ++ * anything (prior to calling ->readlink) which could become inconsistent ++ * due to branch management. We also don't need to lock our super because ++ * this function isn't affected by branch-management. ++ */ ++static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ char *buf; ++ int len = PAGE_SIZE, err; ++ mm_segment_t old_fs; ++ ++ /* This is freed by the put_link method assuming a successful call. */ ++ buf = kmalloc(len, GFP_KERNEL); ++ if (unlikely(!buf)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* read the symlink, and then we will follow it */ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); ++ set_fs(old_fs); ++ if (err < 0) { ++ kfree(buf); ++ buf = NULL; ++ goto out; ++ } ++ buf[err] = 0; ++ nd_set_link(nd, buf); ++ err = 0; ++ ++out: ++ if (!err) { ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ } ++ unionfs_check_nd(nd); ++ return ERR_PTR(err); ++} ++ ++/* FIXME: We may not have to lock here */ ++static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd, ++ void *cookie) ++{ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, nd, false))) ++ printk(KERN_ERR ++ "unionfs: put_link failed to revalidate dentry\n"); ++ ++ unionfs_check_dentry(dentry); ++ unionfs_check_nd(nd); ++ kfree(nd_get_link(nd)); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++} ++ ++/* ++ * Don't grab the superblock read-lock in unionfs_permission, which prevents ++ * a deadlock with the branch-management "add branch" code (which grabbed ++ * the write lock). It is safe to not grab the read lock here, because even ++ * with branch management taking place, there is no chance that ++ * unionfs_permission, or anything it calls, will use stale branch ++ * information. ++ */ ++static int unionfs_permission(struct inode *inode, int mask) ++{ ++ struct inode *lower_inode = NULL; ++ int err = 0; ++ int bindex, bstart, bend; ++ const int is_file = !S_ISDIR(inode->i_mode); ++ const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ); ++ ++ if (!UNIONFS_I(inode)->lower_inodes) { ++ if (is_file) /* dirs can be unlinked but chdir'ed to */ ++ err = -ESTALE; /* force revalidate */ ++ goto out; ++ } ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (unlikely(bstart < 0 || bend < 0)) { ++ /* ++ * With branch-management, we can get a stale inode here. ++ * If so, we return ESTALE back to link_path_walk, which ++ * would discard the dcache entry and re-lookup the ++ * dentry+inode. This should be equivalent to issuing ++ * __unionfs_d_revalidate_chain on nd.dentry here. ++ */ ++ if (is_file) /* dirs can be unlinked but chdir'ed to */ ++ err = -ESTALE; /* force revalidate */ ++ goto out; ++ } ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ ++ /* ++ * check the condition for D-F-D underlying files/directories, ++ * we don't have to check for files, if we are checking for ++ * directories. ++ */ ++ if (!is_file && !S_ISDIR(lower_inode->i_mode)) ++ continue; ++ ++ /* ++ * We check basic permissions, but we ignore any conditions ++ * such as readonly file systems or branches marked as ++ * readonly, because those conditions should lead to a ++ * copyup taking place later on. ++ */ ++ err = inode_permission(lower_inode, mask); ++ if (err && bindex > 0) { ++ umode_t mode = lower_inode->i_mode; ++ if (is_robranch_super(inode->i_sb, bindex) && ++ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) ++ err = 0; ++ if (IS_COPYUP_ERR(err)) ++ err = 0; ++ } ++ ++ /* ++ * The permissions are an intersection of the overall directory ++ * permissions, so we fail if one fails. ++ */ ++ if (err) ++ goto out; ++ ++ /* only the leftmost file matters. */ ++ if (is_file || write_mask) { ++ if (is_file && write_mask) { ++ err = get_write_access(lower_inode); ++ if (!err) ++ put_write_access(lower_inode); ++ } ++ break; ++ } ++ } ++ /* sync times which may have changed (asynchronously) below */ ++ unionfs_copy_attr_times(inode); ++ ++out: ++ unionfs_check_inode(inode); ++ return err; ++} ++ ++static int unionfs_setattr(struct dentry *dentry, struct iattr *ia) ++{ ++ int err = 0; ++ struct dentry *lower_dentry; ++ struct inode *inode; ++ struct inode *lower_inode; ++ int bstart, bend, bindex; ++ loff_t size; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ inode = dentry->d_inode; ++ ++ /* ++ * mode change is for clearing setuid/setgid. Allow lower filesystem ++ * to reinterpret it in its own way. ++ */ ++ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ++ ia->ia_valid &= ~ATTR_MODE; ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ BUG_ON(!lower_dentry); /* should never happen after above revalidate */ ++ ++ /* copyup if the file is on a read only branch */ ++ if (is_robranch_super(dentry->d_sb, bstart) ++ || IS_RDONLY(lower_dentry->d_inode)) { ++ /* check if we have a branch to copy up to */ ++ if (bstart <= 0) { ++ err = -EACCES; ++ goto out; ++ } ++ ++ if (ia->ia_valid & ATTR_SIZE) ++ size = ia->ia_size; ++ else ++ size = i_size_read(inode); ++ /* copyup to next available branch */ ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_dentry(dentry->d_parent->d_inode, ++ dentry, bstart, bindex, ++ dentry->d_name.name, ++ dentry->d_name.len, ++ NULL, size); ++ if (!err) ++ break; ++ } ++ if (err) ++ goto out; ++ /* get updated lower_dentry after copyup */ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ } ++ ++ lower_inode = unionfs_lower_inode(inode); ++ ++ /* ++ * If shrinking, first truncate upper level to cancel writing dirty ++ * pages beyond the new eof; and also if its' maxbytes is more ++ * limiting (fail with -EFBIG before making any change to the lower ++ * level). There is no need to vmtruncate the upper level ++ * afterwards in the other cases: we fsstack_copy_inode_size from ++ * the lower level. ++ */ ++ if (ia->ia_valid & ATTR_SIZE) { ++ size = i_size_read(inode); ++ if (ia->ia_size < size || (ia->ia_size > size && ++ inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) { ++ err = vmtruncate(inode, ia->ia_size); ++ if (err) ++ goto out; ++ } ++ } ++ ++ /* notify the (possibly copied-up) lower inode */ ++ mutex_lock(&lower_dentry->d_inode->i_mutex); ++ err = notify_change(lower_dentry, ia); ++ mutex_unlock(&lower_dentry->d_inode->i_mutex); ++ if (err) ++ goto out; ++ ++ /* get attributes from the first lower inode */ ++ unionfs_copy_attr_all(inode, lower_inode); ++ /* ++ * unionfs_copy_attr_all will copy the lower times to our inode if ++ * the lower ones are newer (useful for cache coherency). However, ++ * ->setattr is the only place in which we may have to copy the ++ * lower inode times absolutely, to support utimes(2). ++ */ ++ if (ia->ia_valid & ATTR_MTIME_SET) ++ inode->i_mtime = lower_inode->i_mtime; ++ if (ia->ia_valid & ATTR_CTIME) ++ inode->i_ctime = lower_inode->i_ctime; ++ if (ia->ia_valid & ATTR_ATIME_SET) ++ inode->i_atime = lower_inode->i_atime; ++ fsstack_copy_inode_size(inode, lower_inode); ++ ++out: ++ if (!err) ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++struct inode_operations unionfs_symlink_iops = { ++ .readlink = unionfs_readlink, ++ .permission = unionfs_permission, ++ .follow_link = unionfs_follow_link, ++ .setattr = unionfs_setattr, ++ .put_link = unionfs_put_link, ++}; ++ ++struct inode_operations unionfs_dir_iops = { ++ .create = unionfs_create, ++ .lookup = unionfs_lookup, ++ .link = unionfs_link, ++ .unlink = unionfs_unlink, ++ .symlink = unionfs_symlink, ++ .mkdir = unionfs_mkdir, ++ .rmdir = unionfs_rmdir, ++ .mknod = unionfs_mknod, ++ .rename = unionfs_rename, ++ .permission = unionfs_permission, ++ .setattr = unionfs_setattr, ++#ifdef CONFIG_UNION_FS_XATTR ++ .setxattr = unionfs_setxattr, ++ .getxattr = unionfs_getxattr, ++ .removexattr = unionfs_removexattr, ++ .listxattr = unionfs_listxattr, ++#endif /* CONFIG_UNION_FS_XATTR */ ++}; ++ ++struct inode_operations unionfs_main_iops = { ++ .permission = unionfs_permission, ++ .setattr = unionfs_setattr, ++#ifdef CONFIG_UNION_FS_XATTR ++ .setxattr = unionfs_setxattr, ++ .getxattr = unionfs_getxattr, ++ .removexattr = unionfs_removexattr, ++ .listxattr = unionfs_listxattr, ++#endif /* CONFIG_UNION_FS_XATTR */ ++}; +diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c +new file mode 100644 +index 0000000..0a9602a +--- /dev/null ++++ b/fs/unionfs/lookup.c +@@ -0,0 +1,570 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Lookup one path component @name relative to a <base,mnt> path pair. ++ * Behaves nearly the same as lookup_one_len (i.e., return negative dentry ++ * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and ++ * other lower mounts properly. If @new_mnt is non-null, will fill in the ++ * new mnt there. Caller is responsible to dput/mntput/path_put returned ++ * @dentry and @new_mnt. ++ */ ++struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt, ++ const char *name, struct vfsmount **new_mnt) ++{ ++ struct dentry *dentry = NULL; ++ struct nameidata lower_nd; ++ int err; ++ ++ /* we use flags=0 to get basic lookup */ ++ err = vfs_path_lookup(base, mnt, name, 0, &lower_nd); ++ ++ switch (err) { ++ case 0: /* no error */ ++ dentry = lower_nd.path.dentry; ++ if (new_mnt) ++ *new_mnt = lower_nd.path.mnt; /* rc already inc'ed */ ++ break; ++ case -ENOENT: ++ /* ++ * We don't consider ENOENT an error, and we want to return ++ * a negative dentry (ala lookup_one_len). As we know ++ * there was no inode for this name before (-ENOENT), then ++ * it's safe to call lookup_one_len (which doesn't take a ++ * vfsmount). ++ */ ++ dentry = lookup_one_len(name, base, strlen(name)); ++ if (new_mnt) ++ *new_mnt = mntget(lower_nd.path.mnt); ++ break; ++ default: /* all other real errors */ ++ dentry = ERR_PTR(err); ++ break; ++ } ++ ++ return dentry; ++} ++ ++/* ++ * This is a utility function that fills in a unionfs dentry. ++ * Caller must lock this dentry with unionfs_lock_dentry. ++ * ++ * Returns: 0 (ok), or -ERRNO if an error occurred. ++ * XXX: get rid of _partial_lookup and make callers call _lookup_full directly ++ */ ++int unionfs_partial_lookup(struct dentry *dentry) ++{ ++ struct dentry *tmp; ++ struct nameidata nd = { .flags = 0 }; ++ int err = -ENOSYS; ++ ++ tmp = unionfs_lookup_full(dentry, &nd, INTERPOSE_PARTIAL); ++ ++ if (!tmp) { ++ err = 0; ++ goto out; ++ } ++ if (IS_ERR(tmp)) { ++ err = PTR_ERR(tmp); ++ goto out; ++ } ++ /* XXX: need to change the interface */ ++ BUG_ON(tmp != dentry); ++out: ++ return err; ++} ++ ++/* The dentry cache is just so we have properly sized dentries. */ ++static struct kmem_cache *unionfs_dentry_cachep; ++int unionfs_init_dentry_cache(void) ++{ ++ unionfs_dentry_cachep = ++ kmem_cache_create("unionfs_dentry", ++ sizeof(struct unionfs_dentry_info), ++ 0, SLAB_RECLAIM_ACCOUNT, NULL); ++ ++ return (unionfs_dentry_cachep ? 0 : -ENOMEM); ++} ++ ++void unionfs_destroy_dentry_cache(void) ++{ ++ if (unionfs_dentry_cachep) ++ kmem_cache_destroy(unionfs_dentry_cachep); ++} ++ ++void free_dentry_private_data(struct dentry *dentry) ++{ ++ if (!dentry || !dentry->d_fsdata) ++ return; ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata); ++ dentry->d_fsdata = NULL; ++} ++ ++static inline int __realloc_dentry_private_data(struct dentry *dentry) ++{ ++ struct unionfs_dentry_info *info = UNIONFS_D(dentry); ++ void *p; ++ int size; ++ ++ BUG_ON(!info); ++ ++ size = sizeof(struct path) * sbmax(dentry->d_sb); ++ p = krealloc(info->lower_paths, size, GFP_ATOMIC); ++ if (unlikely(!p)) ++ return -ENOMEM; ++ ++ info->lower_paths = p; ++ ++ info->bstart = -1; ++ info->bend = -1; ++ info->bopaque = -1; ++ info->bcount = sbmax(dentry->d_sb); ++ atomic_set(&info->generation, ++ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation)); ++ ++ memset(info->lower_paths, 0, size); ++ ++ return 0; ++} ++ ++/* UNIONFS_D(dentry)->lock must be locked */ ++int realloc_dentry_private_data(struct dentry *dentry) ++{ ++ if (!__realloc_dentry_private_data(dentry)) ++ return 0; ++ ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ free_dentry_private_data(dentry); ++ return -ENOMEM; ++} ++ ++/* allocate new dentry private data */ ++int new_dentry_private_data(struct dentry *dentry, int subclass) ++{ ++ struct unionfs_dentry_info *info = UNIONFS_D(dentry); ++ ++ BUG_ON(info); ++ ++ info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC); ++ if (unlikely(!info)) ++ return -ENOMEM; ++ ++ mutex_init(&info->lock); ++ mutex_lock_nested(&info->lock, subclass); ++ ++ info->lower_paths = NULL; ++ ++ dentry->d_fsdata = info; ++ ++ if (!__realloc_dentry_private_data(dentry)) ++ return 0; ++ ++ mutex_unlock(&info->lock); ++ free_dentry_private_data(dentry); ++ return -ENOMEM; ++} ++ ++/* ++ * scan through the lower dentry objects, and set bstart to reflect the ++ * starting branch ++ */ ++void update_bstart(struct dentry *dentry) ++{ ++ int bindex; ++ int bstart = dbstart(dentry); ++ int bend = dbend(dentry); ++ struct dentry *lower_dentry; ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (lower_dentry->d_inode) { ++ dbstart(dentry) = bindex; ++ break; ++ } ++ dput(lower_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } ++} ++ ++ ++/* ++ * Initialize a nameidata structure (the intent part) we can pass to a lower ++ * file system. Returns 0 on success or -error (only -ENOMEM possible). ++ * Inside that nd structure, this function may also return an allocated ++ * struct file (for open intents). The caller, when done with this nd, must ++ * kfree the intent file (using release_lower_nd). ++ * ++ * XXX: this code, and the callers of this code, should be redone using ++ * vfs_path_lookup() when (1) the nameidata structure is refactored into a ++ * separate intent-structure, and (2) open_namei() is broken into a VFS-only ++ * function and a method that other file systems can call. ++ */ ++int init_lower_nd(struct nameidata *nd, unsigned int flags) ++{ ++ int err = 0; ++#ifdef ALLOC_LOWER_ND_FILE ++ /* ++ * XXX: one day we may need to have the lower return an open file ++ * for us. It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may ++ * very well be needed for nfs4. ++ */ ++ struct file *file; ++#endif /* ALLOC_LOWER_ND_FILE */ ++ ++ memset(nd, 0, sizeof(struct nameidata)); ++ if (!flags) ++ return err; ++ ++ switch (flags) { ++ case LOOKUP_CREATE: ++ nd->intent.open.flags |= O_CREAT; ++ /* fall through: shared code for create/open cases */ ++ case LOOKUP_OPEN: ++ nd->flags = flags; ++ nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE); ++#ifdef ALLOC_LOWER_ND_FILE ++ file = kzalloc(sizeof(struct file), GFP_KERNEL); ++ if (unlikely(!file)) { ++ err = -ENOMEM; ++ break; /* exit switch statement and thus return */ ++ } ++ nd->intent.open.file = file; ++#endif /* ALLOC_LOWER_ND_FILE */ ++ break; ++ default: ++ /* ++ * We should never get here, for now. ++ * We can add new cases here later on. ++ */ ++ pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags); ++ BUG(); ++ break; ++ } ++ ++ return err; ++} ++ ++void release_lower_nd(struct nameidata *nd, int err) ++{ ++ if (!nd->intent.open.file) ++ return; ++ else if (!err) ++ release_open_intent(nd); ++#ifdef ALLOC_LOWER_ND_FILE ++ kfree(nd->intent.open.file); ++#endif /* ALLOC_LOWER_ND_FILE */ ++} ++ ++/* ++ * Main (and complex) driver function for Unionfs's lookup ++ * ++ * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error ++ * PTR if d_splice returned a different dentry. ++ * ++ * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's ++ * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a ++ * newly looked-up dentry), then unionfs_lookup_backend will return a locked ++ * dentry's info, which the caller must unlock. ++ */ ++struct dentry *unionfs_lookup_full(struct dentry *dentry, ++ struct nameidata *nd_unused, int lookupmode) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct vfsmount *lower_mnt; ++ struct vfsmount *lower_dir_mnt; ++ struct dentry *wh_lower_dentry = NULL; ++ struct dentry *lower_dir_dentry = NULL; ++ struct dentry *parent_dentry = NULL; ++ struct dentry *d_interposed = NULL; ++ int bindex, bstart, bend, bopaque; ++ int opaque, num_positive = 0; ++ const char *name; ++ int namelen; ++ int pos_start, pos_end; ++ ++ /* ++ * We should already have a lock on this dentry in the case of a ++ * partial lookup, or a revalidation. Otherwise it is returned from ++ * new_dentry_private_data already locked. ++ */ ++ verify_locked(dentry); ++ ++ /* must initialize dentry operations */ ++ dentry->d_op = &unionfs_dops; ++ ++ /* We never partial lookup the root directory. */ ++ if (IS_ROOT(dentry)) ++ goto out; ++ parent_dentry = dget_parent(dentry); ++ ++ name = dentry->d_name.name; ++ namelen = dentry->d_name.len; ++ ++ /* No dentries should get created for possible whiteout names. */ ++ if (!is_validname(name)) { ++ err = -EPERM; ++ goto out_free; ++ } ++ ++ /* Now start the actual lookup procedure. */ ++ bstart = dbstart(parent_dentry); ++ bend = dbend(parent_dentry); ++ bopaque = dbopaque(parent_dentry); ++ BUG_ON(bstart < 0); ++ ++ /* adjust bend to bopaque if needed */ ++ if ((bopaque >= 0) && (bopaque < bend)) ++ bend = bopaque; ++ ++ /* lookup all possible dentries */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ ++ /* skip if we already have a positive lower dentry */ ++ if (lower_dentry) { ++ if (dbstart(dentry) < 0) ++ dbstart(dentry) = bindex; ++ if (bindex > dbend(dentry)) ++ dbend(dentry) = bindex; ++ if (lower_dentry->d_inode) ++ num_positive++; ++ continue; ++ } ++ ++ lower_dir_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ /* if the lower dentry's parent does not exist, skip this */ ++ if (!lower_dir_dentry || !lower_dir_dentry->d_inode) ++ continue; ++ ++ /* also skip it if the parent isn't a directory. */ ++ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode)) ++ continue; /* XXX: should be BUG_ON */ ++ ++ /* check for whiteouts: stop lookup if found */ ++ wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry); ++ if (IS_ERR(wh_lower_dentry)) { ++ err = PTR_ERR(wh_lower_dentry); ++ goto out_free; ++ } ++ if (wh_lower_dentry->d_inode) { ++ dbend(dentry) = dbopaque(dentry) = bindex; ++ if (dbstart(dentry) < 0) ++ dbstart(dentry) = bindex; ++ dput(wh_lower_dentry); ++ break; ++ } ++ dput(wh_lower_dentry); ++ ++ /* Now do regular lookup; lookup @name */ ++ lower_dir_mnt = unionfs_lower_mnt_idx(parent_dentry, bindex); ++ lower_mnt = NULL; /* XXX: needed? */ ++ ++ lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt, ++ name, &lower_mnt); ++ ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out_free; ++ } ++ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry); ++ BUG_ON(!lower_mnt); ++ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt); ++ ++ /* adjust dbstart/end */ ++ if (dbstart(dentry) < 0) ++ dbstart(dentry) = bindex; ++ if (bindex > dbend(dentry)) ++ dbend(dentry) = bindex; ++ /* ++ * We always store the lower dentries above, and update ++ * dbstart/dbend, even if the whole unionfs dentry is ++ * negative (i.e., no lower inodes). ++ */ ++ if (!lower_dentry->d_inode) ++ continue; ++ num_positive++; ++ ++ /* ++ * check if we just found an opaque directory, if so, stop ++ * lookups here. ++ */ ++ if (!S_ISDIR(lower_dentry->d_inode->i_mode)) ++ continue; ++ opaque = is_opaque_dir(dentry, bindex); ++ if (opaque < 0) { ++ err = opaque; ++ goto out_free; ++ } else if (opaque) { ++ dbend(dentry) = dbopaque(dentry) = bindex; ++ break; ++ } ++ dbend(dentry) = bindex; ++ ++ /* update parent directory's atime with the bindex */ ++ fsstack_copy_attr_atime(parent_dentry->d_inode, ++ lower_dir_dentry->d_inode); ++ } ++ ++ /* sanity checks, then decide if to process a negative dentry */ ++ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0); ++ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0); ++ ++ if (num_positive > 0) ++ goto out_positive; ++ ++ /*** handle NEGATIVE dentries ***/ ++ ++ /* ++ * If negative, keep only first lower negative dentry, to save on ++ * memory. ++ */ ++ if (dbstart(dentry) < dbend(dentry)) { ++ path_put_lowers(dentry, dbstart(dentry) + 1, ++ dbend(dentry), false); ++ dbend(dentry) = dbstart(dentry); ++ } ++ if (lookupmode == INTERPOSE_PARTIAL) ++ goto out; ++ if (lookupmode == INTERPOSE_LOOKUP) { ++ /* ++ * If all we found was a whiteout in the first available ++ * branch, then create a negative dentry for a possibly new ++ * file to be created. ++ */ ++ if (dbopaque(dentry) < 0) ++ goto out; ++ /* XXX: need to get mnt here */ ++ bindex = dbstart(dentry); ++ if (unionfs_lower_dentry_idx(dentry, bindex)) ++ goto out; ++ lower_dir_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ if (!lower_dir_dentry || !lower_dir_dentry->d_inode) ++ goto out; ++ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode)) ++ goto out; /* XXX: should be BUG_ON */ ++ /* XXX: do we need to cross bind mounts here? */ ++ lower_dentry = lookup_one_len(name, lower_dir_dentry, namelen); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ /* XXX: need to mntget/mntput as needed too! */ ++ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry); ++ /* XXX: wrong mnt for crossing bind mounts! */ ++ lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex); ++ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt); ++ ++ goto out; ++ } ++ ++ /* if we're revalidating a positive dentry, don't make it negative */ ++ if (lookupmode != INTERPOSE_REVAL) ++ d_add(dentry, NULL); ++ ++ goto out; ++ ++out_positive: ++ /*** handle POSITIVE dentries ***/ ++ ++ /* ++ * This unionfs dentry is positive (at least one lower inode ++ * exists), so scan entire dentry from beginning to end, and remove ++ * any negative lower dentries, if any. Then, update dbstart/dbend ++ * to reflect the start/end of positive dentries. ++ */ ++ pos_start = pos_end = -1; ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, ++ bindex); ++ if (lower_dentry && lower_dentry->d_inode) { ++ if (pos_start < 0) ++ pos_start = bindex; ++ if (bindex > pos_end) ++ pos_end = bindex; ++ continue; ++ } ++ path_put_lowers(dentry, bindex, bindex, false); ++ } ++ if (pos_start >= 0) ++ dbstart(dentry) = pos_start; ++ if (pos_end >= 0) ++ dbend(dentry) = pos_end; ++ ++ /* Partial lookups need to re-interpose, or throw away older negs. */ ++ if (lookupmode == INTERPOSE_PARTIAL) { ++ if (dentry->d_inode) { ++ unionfs_reinterpose(dentry); ++ goto out; ++ } ++ ++ /* ++ * This dentry was positive, so it is as if we had a ++ * negative revalidation. ++ */ ++ lookupmode = INTERPOSE_REVAL_NEG; ++ update_bstart(dentry); ++ } ++ ++ /* ++ * Interpose can return a dentry if d_splice returned a different ++ * dentry. ++ */ ++ d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode); ++ if (IS_ERR(d_interposed)) ++ err = PTR_ERR(d_interposed); ++ else if (d_interposed) ++ dentry = d_interposed; ++ ++ if (!err) ++ goto out; ++ d_drop(dentry); ++ ++out_free: ++ /* should dput/mntput all the underlying dentries on error condition */ ++ if (dbstart(dentry) >= 0) ++ path_put_lowers_all(dentry, false); ++ /* free lower_paths unconditionally */ ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ ++out: ++ if (dentry && UNIONFS_D(dentry)) { ++ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0); ++ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0); ++ } ++ if (d_interposed && UNIONFS_D(d_interposed)) { ++ BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0); ++ BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0); ++ } ++ ++ dput(parent_dentry); ++ if (!err && d_interposed) ++ return d_interposed; ++ return ERR_PTR(err); ++} +diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c +new file mode 100644 +index 0000000..fea670b +--- /dev/null ++++ b/fs/unionfs/main.c +@@ -0,0 +1,777 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++#include <linux/module.h> ++#include <linux/moduleparam.h> ++ ++static void unionfs_fill_inode(struct dentry *dentry, ++ struct inode *inode) ++{ ++ struct inode *lower_inode; ++ struct dentry *lower_dentry; ++ int bindex, bstart, bend; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) { ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ continue; ++ } ++ ++ /* Initialize the lower inode to the new lower inode. */ ++ if (!lower_dentry->d_inode) ++ continue; ++ ++ unionfs_set_lower_inode_idx(inode, bindex, ++ igrab(lower_dentry->d_inode)); ++ } ++ ++ ibstart(inode) = dbstart(dentry); ++ ibend(inode) = dbend(dentry); ++ ++ /* Use attributes from the first branch. */ ++ lower_inode = unionfs_lower_inode(inode); ++ ++ /* Use different set of inode ops for symlinks & directories */ ++ if (S_ISLNK(lower_inode->i_mode)) ++ inode->i_op = &unionfs_symlink_iops; ++ else if (S_ISDIR(lower_inode->i_mode)) ++ inode->i_op = &unionfs_dir_iops; ++ ++ /* Use different set of file ops for directories */ ++ if (S_ISDIR(lower_inode->i_mode)) ++ inode->i_fop = &unionfs_dir_fops; ++ ++ /* properly initialize special inodes */ ++ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || ++ S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) ++ init_special_inode(inode, lower_inode->i_mode, ++ lower_inode->i_rdev); ++ ++ /* all well, copy inode attributes */ ++ unionfs_copy_attr_all(inode, lower_inode); ++ fsstack_copy_inode_size(inode, lower_inode); ++} ++ ++/* ++ * Connect a unionfs inode dentry/inode with several lower ones. This is ++ * the classic stackable file system "vnode interposition" action. ++ * ++ * @sb: unionfs's super_block ++ */ ++struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb, ++ int flag) ++{ ++ int err = 0; ++ struct inode *inode; ++ int need_fill_inode = 1; ++ struct dentry *spliced = NULL; ++ ++ verify_locked(dentry); ++ ++ /* ++ * We allocate our new inode below by calling unionfs_iget, ++ * which will initialize some of the new inode's fields ++ */ ++ ++ /* ++ * On revalidate we've already got our own inode and just need ++ * to fix it up. ++ */ ++ if (flag == INTERPOSE_REVAL) { ++ inode = dentry->d_inode; ++ UNIONFS_I(inode)->bstart = -1; ++ UNIONFS_I(inode)->bend = -1; ++ atomic_set(&UNIONFS_I(inode)->generation, ++ atomic_read(&UNIONFS_SB(sb)->generation)); ++ ++ UNIONFS_I(inode)->lower_inodes = ++ kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL); ++ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ } else { ++ /* get unique inode number for unionfs */ ++ inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO)); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out; ++ } ++ if (atomic_read(&inode->i_count) > 1) ++ goto skip; ++ } ++ ++ need_fill_inode = 0; ++ unionfs_fill_inode(dentry, inode); ++ ++skip: ++ /* only (our) lookup wants to do a d_add */ ++ switch (flag) { ++ case INTERPOSE_DEFAULT: ++ /* for operations which create new inodes */ ++ d_add(dentry, inode); ++ break; ++ case INTERPOSE_REVAL_NEG: ++ d_instantiate(dentry, inode); ++ break; ++ case INTERPOSE_LOOKUP: ++ spliced = d_splice_alias(inode, dentry); ++ if (spliced && spliced != dentry) { ++ /* ++ * d_splice can return a dentry if it was ++ * disconnected and had to be moved. We must ensure ++ * that the private data of the new dentry is ++ * correct and that the inode info was filled ++ * properly. Finally we must return this new ++ * dentry. ++ */ ++ spliced->d_op = &unionfs_dops; ++ spliced->d_fsdata = dentry->d_fsdata; ++ dentry->d_fsdata = NULL; ++ dentry = spliced; ++ if (need_fill_inode) { ++ need_fill_inode = 0; ++ unionfs_fill_inode(dentry, inode); ++ } ++ goto out_spliced; ++ } else if (!spliced) { ++ if (need_fill_inode) { ++ need_fill_inode = 0; ++ unionfs_fill_inode(dentry, inode); ++ goto out_spliced; ++ } ++ } ++ break; ++ case INTERPOSE_REVAL: ++ /* Do nothing. */ ++ break; ++ default: ++ printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n"); ++ BUG(); ++ } ++ goto out; ++ ++out_spliced: ++ if (!err) ++ return spliced; ++out: ++ return ERR_PTR(err); ++} ++ ++/* like interpose above, but for an already existing dentry */ ++void unionfs_reinterpose(struct dentry *dentry) ++{ ++ struct dentry *lower_dentry; ++ struct inode *inode; ++ int bindex, bstart, bend; ++ ++ verify_locked(dentry); ++ ++ /* This is pre-allocated inode */ ++ inode = dentry->d_inode; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ ++ if (!lower_dentry->d_inode) ++ continue; ++ if (unionfs_lower_inode_idx(inode, bindex)) ++ continue; ++ unionfs_set_lower_inode_idx(inode, bindex, ++ igrab(lower_dentry->d_inode)); ++ } ++ ibstart(inode) = dbstart(dentry); ++ ibend(inode) = dbend(dentry); ++} ++ ++/* ++ * make sure the branch we just looked up (nd) makes sense: ++ * ++ * 1) we're not trying to stack unionfs on top of unionfs ++ * 2) it exists ++ * 3) is a directory ++ */ ++int check_branch(struct nameidata *nd) ++{ ++ /* XXX: remove in ODF code -- stacking unions allowed there */ ++ if (!strcmp(nd->path.dentry->d_sb->s_type->name, UNIONFS_NAME)) ++ return -EINVAL; ++ if (!nd->path.dentry->d_inode) ++ return -ENOENT; ++ if (!S_ISDIR(nd->path.dentry->d_inode->i_mode)) ++ return -ENOTDIR; ++ return 0; ++} ++ ++/* checks if two lower_dentries have overlapping branches */ ++static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2) ++{ ++ struct dentry *dent = NULL; ++ ++ dent = dent1; ++ while ((dent != dent2) && (dent->d_parent != dent)) ++ dent = dent->d_parent; ++ ++ if (dent == dent2) ++ return 1; ++ ++ dent = dent2; ++ while ((dent != dent1) && (dent->d_parent != dent)) ++ dent = dent->d_parent; ++ ++ return (dent == dent1); ++} ++ ++/* ++ * Parse "ro" or "rw" options, but default to "rw" if no mode options was ++ * specified. Fill the mode bits in @perms. If encounter an unknown ++ * string, return -EINVAL. Otherwise return 0. ++ */ ++int parse_branch_mode(const char *name, int *perms) ++{ ++ if (!name || !strcmp(name, "rw")) { ++ *perms = MAY_READ | MAY_WRITE; ++ return 0; ++ } ++ if (!strcmp(name, "ro")) { ++ *perms = MAY_READ; ++ return 0; ++ } ++ return -EINVAL; ++} ++ ++/* ++ * parse the dirs= mount argument ++ * ++ * We don't need to lock the superblock private data's rwsem, as we get ++ * called only by unionfs_read_super - it is still a long time before anyone ++ * can even get a reference to us. ++ */ ++static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info ++ *lower_root_info, char *options) ++{ ++ struct nameidata nd; ++ char *name; ++ int err = 0; ++ int branches = 1; ++ int bindex = 0; ++ int i = 0; ++ int j = 0; ++ struct dentry *dent1; ++ struct dentry *dent2; ++ ++ if (options[0] == '\0') { ++ printk(KERN_ERR "unionfs: no branches specified\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Each colon means we have a separator, this is really just a rough ++ * guess, since strsep will handle empty fields for us. ++ */ ++ for (i = 0; options[i]; i++) ++ if (options[i] == ':') ++ branches++; ++ ++ /* allocate space for underlying pointers to lower dentry */ ++ UNIONFS_SB(sb)->data = ++ kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL); ++ if (unlikely(!UNIONFS_SB(sb)->data)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ lower_root_info->lower_paths = ++ kcalloc(branches, sizeof(struct path), GFP_KERNEL); ++ if (unlikely(!lower_root_info->lower_paths)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */ ++ branches = 0; ++ while ((name = strsep(&options, ":")) != NULL) { ++ int perms; ++ char *mode = strchr(name, '='); ++ ++ if (!name) ++ continue; ++ if (!*name) { /* bad use of ':' (extra colons) */ ++ err = -EINVAL; ++ goto out; ++ } ++ ++ branches++; ++ ++ /* strip off '=' if any */ ++ if (mode) ++ *mode++ = '\0'; ++ ++ err = parse_branch_mode(mode, &perms); ++ if (err) { ++ printk(KERN_ERR "unionfs: invalid mode \"%s\" for " ++ "branch %d\n", mode, bindex); ++ goto out; ++ } ++ /* ensure that leftmost branch is writeable */ ++ if (!bindex && !(perms & MAY_WRITE)) { ++ printk(KERN_ERR "unionfs: leftmost branch cannot be " ++ "read-only (use \"-o ro\" to create a " ++ "read-only union)\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = path_lookup(name, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory '%s' (error %d)\n", ++ name, err); ++ goto out; ++ } ++ ++ err = check_branch(&nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: lower directory " ++ "'%s' is not a valid branch\n", name); ++ path_put(&nd.path); ++ goto out; ++ } ++ ++ lower_root_info->lower_paths[bindex].dentry = nd.path.dentry; ++ lower_root_info->lower_paths[bindex].mnt = nd.path.mnt; ++ ++ set_branchperms(sb, bindex, perms); ++ set_branch_count(sb, bindex, 0); ++ new_branch_id(sb, bindex); ++ ++ if (lower_root_info->bstart < 0) ++ lower_root_info->bstart = bindex; ++ lower_root_info->bend = bindex; ++ bindex++; ++ } ++ ++ if (branches == 0) { ++ printk(KERN_ERR "unionfs: no branches specified\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ BUG_ON(branches != (lower_root_info->bend + 1)); ++ ++ /* ++ * Ensure that no overlaps exist in the branches. ++ * ++ * This test is required because the Linux kernel has no support ++ * currently for ensuring coherency between stackable layers and ++ * branches. If we were to allow overlapping branches, it would be ++ * possible, for example, to delete a file via one branch, which ++ * would not be reflected in another branch. Such incoherency could ++ * lead to inconsistencies and even kernel oopses. Rather than ++ * implement hacks to work around some of these cache-coherency ++ * problems, we prevent branch overlapping, for now. A complete ++ * solution will involve proper kernel/VFS support for cache ++ * coherency, at which time we could safely remove this ++ * branch-overlapping test. ++ */ ++ for (i = 0; i < branches; i++) { ++ dent1 = lower_root_info->lower_paths[i].dentry; ++ for (j = i + 1; j < branches; j++) { ++ dent2 = lower_root_info->lower_paths[j].dentry; ++ if (is_branch_overlap(dent1, dent2)) { ++ printk(KERN_ERR "unionfs: branches %d and " ++ "%d overlap\n", i, j); ++ err = -EINVAL; ++ goto out; ++ } ++ } ++ } ++ ++out: ++ if (err) { ++ for (i = 0; i < branches; i++) ++ if (lower_root_info->lower_paths[i].dentry) { ++ dput(lower_root_info->lower_paths[i].dentry); ++ /* initialize: can't use unionfs_mntput here */ ++ mntput(lower_root_info->lower_paths[i].mnt); ++ } ++ ++ kfree(lower_root_info->lower_paths); ++ kfree(UNIONFS_SB(sb)->data); ++ ++ /* ++ * MUST clear the pointers to prevent potential double free if ++ * the caller dies later on ++ */ ++ lower_root_info->lower_paths = NULL; ++ UNIONFS_SB(sb)->data = NULL; ++ } ++ return err; ++} ++ ++/* ++ * Parse mount options. See the manual page for usage instructions. ++ * ++ * Returns the dentry object of the lower-level (lower) directory; ++ * We want to mount our stackable file system on top of that lower directory. ++ */ ++static struct unionfs_dentry_info *unionfs_parse_options( ++ struct super_block *sb, ++ char *options) ++{ ++ struct unionfs_dentry_info *lower_root_info; ++ char *optname; ++ int err = 0; ++ int bindex; ++ int dirsfound = 0; ++ ++ /* allocate private data area */ ++ err = -ENOMEM; ++ lower_root_info = ++ kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL); ++ if (unlikely(!lower_root_info)) ++ goto out_error; ++ lower_root_info->bstart = -1; ++ lower_root_info->bend = -1; ++ lower_root_info->bopaque = -1; ++ ++ while ((optname = strsep(&options, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ ++ /* ++ * All of our options take an argument now. Insert ones that ++ * don't, above this check. ++ */ ++ if (!optarg) { ++ printk(KERN_ERR "unionfs: %s requires an argument\n", ++ optname); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ if (!strcmp("dirs", optname)) { ++ if (++dirsfound > 1) { ++ printk(KERN_ERR ++ "unionfs: multiple dirs specified\n"); ++ err = -EINVAL; ++ goto out_error; ++ } ++ err = parse_dirs_option(sb, lower_root_info, optarg); ++ if (err) ++ goto out_error; ++ continue; ++ } ++ ++ err = -EINVAL; ++ printk(KERN_ERR ++ "unionfs: unrecognized option '%s'\n", optname); ++ goto out_error; ++ } ++ if (dirsfound != 1) { ++ printk(KERN_ERR "unionfs: dirs option required\n"); ++ err = -EINVAL; ++ goto out_error; ++ } ++ goto out; ++ ++out_error: ++ if (lower_root_info && lower_root_info->lower_paths) { ++ for (bindex = lower_root_info->bstart; ++ bindex >= 0 && bindex <= lower_root_info->bend; ++ bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = lower_root_info->lower_paths[bindex].dentry; ++ m = lower_root_info->lower_paths[bindex].mnt; ++ ++ dput(d); ++ /* initializing: can't use unionfs_mntput here */ ++ mntput(m); ++ } ++ } ++ ++ kfree(lower_root_info->lower_paths); ++ kfree(lower_root_info); ++ ++ kfree(UNIONFS_SB(sb)->data); ++ UNIONFS_SB(sb)->data = NULL; ++ ++ lower_root_info = ERR_PTR(err); ++out: ++ return lower_root_info; ++} ++ ++/* ++ * our custom d_alloc_root work-alike ++ * ++ * we can't use d_alloc_root if we want to use our own interpose function ++ * unchanged, so we simply call our own "fake" d_alloc_root ++ */ ++static struct dentry *unionfs_d_alloc_root(struct super_block *sb) ++{ ++ struct dentry *ret = NULL; ++ ++ if (sb) { ++ static const struct qstr name = { ++ .name = "/", ++ .len = 1 ++ }; ++ ++ ret = d_alloc(NULL, &name); ++ if (likely(ret)) { ++ ret->d_op = &unionfs_dops; ++ ret->d_sb = sb; ++ ret->d_parent = ret; ++ } ++ } ++ return ret; ++} ++ ++/* ++ * There is no need to lock the unionfs_super_info's rwsem as there is no ++ * way anyone can have a reference to the superblock at this point in time. ++ */ ++static int unionfs_read_super(struct super_block *sb, void *raw_data, ++ int silent) ++{ ++ int err = 0; ++ struct unionfs_dentry_info *lower_root_info = NULL; ++ int bindex, bstart, bend; ++ ++ if (!raw_data) { ++ printk(KERN_ERR ++ "unionfs: read_super: missing data argument\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* Allocate superblock private data */ ++ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL); ++ if (unlikely(!UNIONFS_SB(sb))) { ++ printk(KERN_CRIT "unionfs: read_super: out of memory\n"); ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ UNIONFS_SB(sb)->bend = -1; ++ atomic_set(&UNIONFS_SB(sb)->generation, 1); ++ init_rwsem(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */ ++ ++ lower_root_info = unionfs_parse_options(sb, raw_data); ++ if (IS_ERR(lower_root_info)) { ++ printk(KERN_ERR ++ "unionfs: read_super: error while parsing options " ++ "(err = %ld)\n", PTR_ERR(lower_root_info)); ++ err = PTR_ERR(lower_root_info); ++ lower_root_info = NULL; ++ goto out_free; ++ } ++ if (lower_root_info->bstart == -1) { ++ err = -ENOENT; ++ goto out_free; ++ } ++ ++ /* set the lower superblock field of upper superblock */ ++ bstart = lower_root_info->bstart; ++ BUG_ON(bstart != 0); ++ sbend(sb) = bend = lower_root_info->bend; ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct dentry *d = lower_root_info->lower_paths[bindex].dentry; ++ atomic_inc(&d->d_sb->s_active); ++ unionfs_set_lower_super_idx(sb, bindex, d->d_sb); ++ } ++ ++ /* max Bytes is the maximum bytes from highest priority branch */ ++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; ++ ++ /* ++ * Our c/m/atime granularity is 1 ns because we may stack on file ++ * systems whose granularity is as good. This is important for our ++ * time-based cache coherency. ++ */ ++ sb->s_time_gran = 1; ++ ++ sb->s_op = &unionfs_sops; ++ ++ /* See comment next to the definition of unionfs_d_alloc_root */ ++ sb->s_root = unionfs_d_alloc_root(sb); ++ if (unlikely(!sb->s_root)) { ++ err = -ENOMEM; ++ goto out_dput; ++ } ++ ++ /* link the upper and lower dentries */ ++ sb->s_root->d_fsdata = NULL; ++ err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT); ++ if (unlikely(err)) ++ goto out_freedpd; ++ ++ /* Set the lower dentries for s_root */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = lower_root_info->lower_paths[bindex].dentry; ++ m = lower_root_info->lower_paths[bindex].mnt; ++ ++ unionfs_set_lower_dentry_idx(sb->s_root, bindex, d); ++ unionfs_set_lower_mnt_idx(sb->s_root, bindex, m); ++ } ++ dbstart(sb->s_root) = bstart; ++ dbend(sb->s_root) = bend; ++ ++ /* Set the generation number to one, since this is for the mount. */ ++ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1); ++ ++ /* ++ * Call interpose to create the upper level inode. Only ++ * INTERPOSE_LOOKUP can return a value other than 0 on err. ++ */ ++ err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0)); ++ unionfs_unlock_dentry(sb->s_root); ++ if (!err) ++ goto out; ++ /* else fall through */ ++ ++out_freedpd: ++ if (UNIONFS_D(sb->s_root)) { ++ kfree(UNIONFS_D(sb->s_root)->lower_paths); ++ free_dentry_private_data(sb->s_root); ++ } ++ dput(sb->s_root); ++ ++out_dput: ++ if (lower_root_info && !IS_ERR(lower_root_info)) { ++ for (bindex = lower_root_info->bstart; ++ bindex <= lower_root_info->bend; bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = lower_root_info->lower_paths[bindex].dentry; ++ m = lower_root_info->lower_paths[bindex].mnt; ++ ++ dput(d); ++ /* initializing: can't use unionfs_mntput here */ ++ mntput(m); ++ /* drop refs we took earlier */ ++ atomic_dec(&d->d_sb->s_active); ++ } ++ kfree(lower_root_info->lower_paths); ++ kfree(lower_root_info); ++ lower_root_info = NULL; ++ } ++ ++out_free: ++ kfree(UNIONFS_SB(sb)->data); ++ kfree(UNIONFS_SB(sb)); ++ sb->s_fs_info = NULL; ++ ++out: ++ if (lower_root_info && !IS_ERR(lower_root_info)) { ++ kfree(lower_root_info->lower_paths); ++ kfree(lower_root_info); ++ } ++ return err; ++} ++ ++static int unionfs_get_sb(struct file_system_type *fs_type, ++ int flags, const char *dev_name, ++ void *raw_data, struct vfsmount *mnt) ++{ ++ int err; ++ err = get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt); ++ if (!err) ++ UNIONFS_SB(mnt->mnt_sb)->dev_name = ++ kstrdup(dev_name, GFP_KERNEL); ++ return err; ++} ++ ++static struct file_system_type unionfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = UNIONFS_NAME, ++ .get_sb = unionfs_get_sb, ++ .kill_sb = generic_shutdown_super, ++ .fs_flags = FS_REVAL_DOT, ++}; ++ ++static int __init init_unionfs_fs(void) ++{ ++ int err; ++ ++ pr_info("Registering unionfs " UNIONFS_VERSION "\n"); ++ ++ err = unionfs_init_filldir_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = unionfs_init_inode_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = unionfs_init_dentry_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = init_sioq(); ++ if (unlikely(err)) ++ goto out; ++ err = register_filesystem(&unionfs_fs_type); ++out: ++ if (unlikely(err)) { ++ stop_sioq(); ++ unionfs_destroy_filldir_cache(); ++ unionfs_destroy_inode_cache(); ++ unionfs_destroy_dentry_cache(); ++ } ++ return err; ++} ++ ++static void __exit exit_unionfs_fs(void) ++{ ++ stop_sioq(); ++ unionfs_destroy_filldir_cache(); ++ unionfs_destroy_inode_cache(); ++ unionfs_destroy_dentry_cache(); ++ unregister_filesystem(&unionfs_fs_type); ++ pr_info("Completed unionfs module unload\n"); ++} ++ ++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" ++ " (http://www.fsl.cs.sunysb.edu)"); ++MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION ++ " (http://unionfs.filesystems.org)"); ++MODULE_LICENSE("GPL"); ++ ++module_init(init_unionfs_fs); ++module_exit(exit_unionfs_fs); +diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c +new file mode 100644 +index 0000000..b7d4713 +--- /dev/null ++++ b/fs/unionfs/mmap.c +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2006 Shaya Potter ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++ ++/* ++ * XXX: we need a dummy readpage handler because generic_file_mmap (which we ++ * use in unionfs_mmap) checks for the existence of ++ * mapping->a_ops->readpage, else it returns -ENOEXEC. The VFS will need to ++ * be fixed to allow a file system to define vm_ops->fault without any ++ * address_space_ops whatsoever. ++ * ++ * Otherwise, we don't want to use our readpage method at all. ++ */ ++static int unionfs_readpage(struct file *file, struct page *page) ++{ ++ BUG(); ++ return -EINVAL; ++} ++ ++static int unionfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ int err; ++ struct file *file, *lower_file; ++ struct vm_operations_struct *lower_vm_ops; ++ struct vm_area_struct lower_vma; ++ ++ BUG_ON(!vma); ++ memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); ++ file = lower_vma.vm_file; ++ lower_vm_ops = UNIONFS_F(file)->lower_vm_ops; ++ BUG_ON(!lower_vm_ops); ++ ++ lower_file = unionfs_lower_file(file); ++ BUG_ON(!lower_file); ++ /* ++ * XXX: vm_ops->fault may be called in parallel. Because we have to ++ * resort to temporarily changing the vma->vm_file to point to the ++ * lower file, a concurrent invocation of unionfs_fault could see a ++ * different value. In this workaround, we keep a different copy of ++ * the vma structure in our stack, so we never expose a different ++ * value of the vma->vm_file called to us, even temporarily. A ++ * better fix would be to change the calling semantics of ->fault to ++ * take an explicit file pointer. ++ */ ++ lower_vma.vm_file = lower_file; ++ err = lower_vm_ops->fault(&lower_vma, vmf); ++ return err; ++} ++ ++/* ++ * XXX: the default address_space_ops for unionfs is empty. We cannot set ++ * our inode->i_mapping->a_ops to NULL because too many code paths expect ++ * the a_ops vector to be non-NULL. ++ */ ++struct address_space_operations unionfs_aops = { ++ /* empty on purpose */ ++}; ++ ++/* ++ * XXX: we need a second, dummy address_space_ops vector, to be used ++ * temporarily during unionfs_mmap, because the latter calls ++ * generic_file_mmap, which checks if ->readpage exists, else returns ++ * -ENOEXEC. ++ */ ++struct address_space_operations unionfs_dummy_aops = { ++ .readpage = unionfs_readpage, ++}; ++ ++struct vm_operations_struct unionfs_vm_ops = { ++ .fault = unionfs_fault, ++}; +diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c +new file mode 100644 +index 0000000..06d5374 +--- /dev/null ++++ b/fs/unionfs/rdstate.c +@@ -0,0 +1,285 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* This file contains the routines for maintaining readdir state. */ ++ ++/* ++ * There are two structures here, rdstate which is a hash table ++ * of the second structure which is a filldir_node. ++ */ ++ ++/* ++ * This is a struct kmem_cache for filldir nodes, because we allocate a lot ++ * of them and they shouldn't waste memory. If the node has a small name ++ * (as defined by the dentry structure), then we use an inline name to ++ * preserve kmalloc space. ++ */ ++static struct kmem_cache *unionfs_filldir_cachep; ++ ++int unionfs_init_filldir_cache(void) ++{ ++ unionfs_filldir_cachep = ++ kmem_cache_create("unionfs_filldir", ++ sizeof(struct filldir_node), 0, ++ SLAB_RECLAIM_ACCOUNT, NULL); ++ ++ return (unionfs_filldir_cachep ? 0 : -ENOMEM); ++} ++ ++void unionfs_destroy_filldir_cache(void) ++{ ++ if (unionfs_filldir_cachep) ++ kmem_cache_destroy(unionfs_filldir_cachep); ++} ++ ++/* ++ * This is a tuning parameter that tells us roughly how big to make the ++ * hash table in directory entries per page. This isn't perfect, but ++ * at least we get a hash table size that shouldn't be too overloaded. ++ * The following averages are based on my home directory. ++ * 14.44693 Overall ++ * 12.29 Single Page Directories ++ * 117.93 Multi-page directories ++ */ ++#define DENTPAGE 4096 ++#define DENTPERONEPAGE 12 ++#define DENTPERPAGE 118 ++#define MINHASHSIZE 1 ++static int guesstimate_hash_size(struct inode *inode) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ int hashsize = MINHASHSIZE; ++ ++ if (UNIONFS_I(inode)->hashsize > 0) ++ return UNIONFS_I(inode)->hashsize; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ ++ if (i_size_read(lower_inode) == DENTPAGE) ++ hashsize += DENTPERONEPAGE; ++ else ++ hashsize += (i_size_read(lower_inode) / DENTPAGE) * ++ DENTPERPAGE; ++ } ++ ++ return hashsize; ++} ++ ++int init_rdstate(struct file *file) ++{ ++ BUG_ON(sizeof(loff_t) != ++ (sizeof(unsigned int) + sizeof(unsigned int))); ++ BUG_ON(UNIONFS_F(file)->rdstate != NULL); ++ ++ UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode, ++ fbstart(file)); ++ ++ return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM); ++} ++ ++struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos) ++{ ++ struct unionfs_dir_state *rdstate = NULL; ++ struct list_head *pos; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ list_for_each(pos, &UNIONFS_I(inode)->readdircache) { ++ struct unionfs_dir_state *r = ++ list_entry(pos, struct unionfs_dir_state, cache); ++ if (fpos == rdstate2offset(r)) { ++ UNIONFS_I(inode)->rdcount--; ++ list_del(&r->cache); ++ rdstate = r; ++ break; ++ } ++ } ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ return rdstate; ++} ++ ++struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex) ++{ ++ int i = 0; ++ int hashsize; ++ unsigned long mallocsize = sizeof(struct unionfs_dir_state); ++ struct unionfs_dir_state *rdstate; ++ ++ hashsize = guesstimate_hash_size(inode); ++ mallocsize += hashsize * sizeof(struct list_head); ++ mallocsize = __roundup_pow_of_two(mallocsize); ++ ++ /* This should give us about 500 entries anyway. */ ++ if (mallocsize > PAGE_SIZE) ++ mallocsize = PAGE_SIZE; ++ ++ hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) / ++ sizeof(struct list_head); ++ ++ rdstate = kmalloc(mallocsize, GFP_KERNEL); ++ if (unlikely(!rdstate)) ++ return NULL; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1)) ++ UNIONFS_I(inode)->cookie = 1; ++ else ++ UNIONFS_I(inode)->cookie++; ++ ++ rdstate->cookie = UNIONFS_I(inode)->cookie; ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ rdstate->offset = 1; ++ rdstate->access = jiffies; ++ rdstate->bindex = bindex; ++ rdstate->dirpos = 0; ++ rdstate->hashentries = 0; ++ rdstate->size = hashsize; ++ for (i = 0; i < rdstate->size; i++) ++ INIT_LIST_HEAD(&rdstate->list[i]); ++ ++ return rdstate; ++} ++ ++static void free_filldir_node(struct filldir_node *node) ++{ ++ if (node->namelen >= DNAME_INLINE_LEN_MIN) ++ kfree(node->name); ++ kmem_cache_free(unionfs_filldir_cachep, node); ++} ++ ++void free_rdstate(struct unionfs_dir_state *state) ++{ ++ struct filldir_node *tmp; ++ int i; ++ ++ for (i = 0; i < state->size; i++) { ++ struct list_head *head = &(state->list[i]); ++ struct list_head *pos, *n; ++ ++ /* traverse the list and deallocate space */ ++ list_for_each_safe(pos, n, head) { ++ tmp = list_entry(pos, struct filldir_node, file_list); ++ list_del(&tmp->file_list); ++ free_filldir_node(tmp); ++ } ++ } ++ ++ kfree(state); ++} ++ ++struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen, ++ int is_whiteout) ++{ ++ int index; ++ unsigned int hash; ++ struct list_head *head; ++ struct list_head *pos; ++ struct filldir_node *cursor = NULL; ++ int found = 0; ++ ++ BUG_ON(namelen <= 0); ++ ++ hash = full_name_hash(name, namelen); ++ index = hash % rdstate->size; ++ ++ head = &(rdstate->list[index]); ++ list_for_each(pos, head) { ++ cursor = list_entry(pos, struct filldir_node, file_list); ++ ++ if (cursor->namelen == namelen && cursor->hash == hash && ++ !strncmp(cursor->name, name, namelen)) { ++ /* ++ * a duplicate exists, and hence no need to create ++ * entry to the list ++ */ ++ found = 1; ++ ++ /* ++ * if a duplicate is found in this branch, and is ++ * not due to the caller looking for an entry to ++ * whiteout, then the file system may be corrupted. ++ */ ++ if (unlikely(!is_whiteout && ++ cursor->bindex == rdstate->bindex)) ++ printk(KERN_ERR "unionfs: filldir: possible " ++ "I/O error: a file is duplicated " ++ "in the same branch %d: %s\n", ++ rdstate->bindex, cursor->name); ++ break; ++ } ++ } ++ ++ if (!found) ++ cursor = NULL; ++ ++ return cursor; ++} ++ ++int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name, ++ int namelen, int bindex, int whiteout) ++{ ++ struct filldir_node *new; ++ unsigned int hash; ++ int index; ++ int err = 0; ++ struct list_head *head; ++ ++ BUG_ON(namelen <= 0); ++ ++ hash = full_name_hash(name, namelen); ++ index = hash % rdstate->size; ++ head = &(rdstate->list[index]); ++ ++ new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL); ++ if (unlikely(!new)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&new->file_list); ++ new->namelen = namelen; ++ new->hash = hash; ++ new->bindex = bindex; ++ new->whiteout = whiteout; ++ ++ if (namelen < DNAME_INLINE_LEN_MIN) { ++ new->name = new->iname; ++ } else { ++ new->name = kmalloc(namelen + 1, GFP_KERNEL); ++ if (unlikely(!new->name)) { ++ kmem_cache_free(unionfs_filldir_cachep, new); ++ new = NULL; ++ goto out; ++ } ++ } ++ ++ memcpy(new->name, name, namelen); ++ new->name[namelen] = '\0'; ++ ++ rdstate->hashentries++; ++ ++ list_add(&(new->file_list), head); ++out: ++ return err; ++} +diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c +new file mode 100644 +index 0000000..da7d589 +--- /dev/null ++++ b/fs/unionfs/rename.c +@@ -0,0 +1,478 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * This is a helper function for rename, used when rename ends up with hosed ++ * over dentries and we need to revert. ++ */ ++static int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex) ++{ ++ struct dentry *lower_dentry; ++ struct dentry *lower_parent; ++ int err = 0; ++ ++ verify_locked(dentry); ++ ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_CHILD); ++ lower_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex); ++ unionfs_unlock_dentry(dentry->d_parent); ++ ++ BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode)); ++ ++ lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent, ++ dentry->d_name.len); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex)); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL); ++ ++ if (!lower_dentry->d_inode) { ++ dput(lower_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } else { ++ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ igrab(lower_dentry->d_inode)); ++ } ++ ++out: ++ return err; ++} ++ ++static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ int bindex) ++{ ++ int err = 0; ++ struct dentry *lower_old_dentry; ++ struct dentry *lower_new_dentry; ++ struct dentry *lower_old_dir_dentry; ++ struct dentry *lower_new_dir_dentry; ++ struct dentry *trap; ++ ++ lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); ++ lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex); ++ ++ if (!lower_new_dentry) { ++ lower_new_dentry = ++ create_parents(new_dentry->d_parent->d_inode, ++ new_dentry, new_dentry->d_name.name, ++ bindex); ++ if (IS_ERR(lower_new_dentry)) { ++ err = PTR_ERR(lower_new_dentry); ++ if (IS_COPYUP_ERR(err)) ++ goto out; ++ printk(KERN_ERR "unionfs: error creating directory " ++ "tree for rename, bindex=%d err=%d\n", ++ bindex, err); ++ goto out; ++ } ++ } ++ ++ /* check for and remove whiteout, if any */ ++ err = check_unlink_whiteout(new_dentry, lower_new_dentry, bindex); ++ if (err > 0) /* ignore if whiteout found and successfully removed */ ++ err = 0; ++ if (err) ++ goto out; ++ ++ /* check of old_dentry branch is writable */ ++ err = is_robranch_super(old_dentry->d_sb, bindex); ++ if (err) ++ goto out; ++ ++ dget(lower_old_dentry); ++ dget(lower_new_dentry); ++ lower_old_dir_dentry = dget_parent(lower_old_dentry); ++ lower_new_dir_dentry = dget_parent(lower_new_dentry); ++ ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); ++ /* source should not be ancenstor of target */ ++ if (trap == lower_old_dentry) { ++ err = -EINVAL; ++ goto out_err_unlock; ++ } ++ /* target should not be ancenstor of source */ ++ if (trap == lower_new_dentry) { ++ err = -ENOTEMPTY; ++ goto out_err_unlock; ++ } ++ err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, ++ lower_new_dir_dentry->d_inode, lower_new_dentry); ++out_err_unlock: ++ if (!err) { ++ /* update parent dir times */ ++ fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode); ++ fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode); ++ } ++ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); ++ lockdep_on(); ++ ++ dput(lower_old_dir_dentry); ++ dput(lower_new_dir_dentry); ++ dput(lower_old_dentry); ++ dput(lower_new_dentry); ++ ++out: ++ if (!err) { ++ /* Fixup the new_dentry. */ ++ if (bindex < dbstart(new_dentry)) ++ dbstart(new_dentry) = bindex; ++ else if (bindex > dbend(new_dentry)) ++ dbend(new_dentry) = bindex; ++ } ++ ++ return err; ++} ++ ++/* ++ * Main rename code. This is sufficiently complex, that it's documented in ++ * Documentation/filesystems/unionfs/rename.txt. This routine calls ++ * __unionfs_rename() above to perform some of the work. ++ */ ++static int do_unionfs_rename(struct inode *old_dir, ++ struct dentry *old_dentry, ++ struct inode *new_dir, ++ struct dentry *new_dentry) ++{ ++ int err = 0; ++ int bindex, bwh_old; ++ int old_bstart, old_bend; ++ int new_bstart, new_bend; ++ int do_copyup = -1; ++ struct dentry *parent_dentry; ++ int local_err = 0; ++ int eio = 0; ++ int revert = 0; ++ ++ old_bstart = dbstart(old_dentry); ++ bwh_old = old_bstart; ++ old_bend = dbend(old_dentry); ++ parent_dentry = old_dentry->d_parent; ++ ++ new_bstart = dbstart(new_dentry); ++ new_bend = dbend(new_dentry); ++ ++ /* Rename source to destination. */ ++ err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry, ++ old_bstart); ++ if (err) { ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ do_copyup = old_bstart - 1; ++ } else { ++ revert = 1; ++ } ++ ++ /* ++ * Unlink all instances of destination that exist to the left of ++ * bstart of source. On error, revert back, goto out. ++ */ ++ for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) { ++ struct dentry *unlink_dentry; ++ struct dentry *unlink_dir_dentry; ++ ++ BUG_ON(bindex < 0); ++ unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); ++ if (!unlink_dentry) ++ continue; ++ ++ unlink_dir_dentry = lock_parent(unlink_dentry); ++ err = is_robranch_super(old_dir->i_sb, bindex); ++ if (!err) ++ err = vfs_unlink(unlink_dir_dentry->d_inode, ++ unlink_dentry); ++ ++ fsstack_copy_attr_times(new_dentry->d_parent->d_inode, ++ unlink_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ new_dentry->d_parent->d_inode->i_nlink = ++ unionfs_get_nlinks(new_dentry->d_parent->d_inode); ++ ++ unlock_dir(unlink_dir_dentry); ++ if (!err) { ++ if (bindex != new_bstart) { ++ dput(unlink_dentry); ++ unionfs_set_lower_dentry_idx(new_dentry, ++ bindex, NULL); ++ } ++ } else if (IS_COPYUP_ERR(err)) { ++ do_copyup = bindex - 1; ++ } else if (revert) { ++ goto revert; ++ } ++ } ++ ++ if (do_copyup != -1) { ++ for (bindex = do_copyup; bindex >= 0; bindex--) { ++ /* ++ * copyup the file into some left directory, so that ++ * you can rename it ++ */ ++ err = copyup_dentry(old_dentry->d_parent->d_inode, ++ old_dentry, old_bstart, bindex, ++ old_dentry->d_name.name, ++ old_dentry->d_name.len, NULL, ++ i_size_read(old_dentry->d_inode)); ++ /* if copyup failed, try next branch to the left */ ++ if (err) ++ continue; ++ bwh_old = bindex; ++ err = __unionfs_rename(old_dir, old_dentry, ++ new_dir, new_dentry, ++ bindex); ++ break; ++ } ++ } ++ ++ /* make it opaque */ ++ if (S_ISDIR(old_dentry->d_inode->i_mode)) { ++ err = make_dir_opaque(old_dentry, dbstart(old_dentry)); ++ if (err) ++ goto revert; ++ } ++ ++ /* ++ * Create whiteout for source, only if: ++ * (1) There is more than one underlying instance of source. ++ * (2) We did a copy_up ++ */ ++ if ((old_bstart != old_bend) || (do_copyup != -1)) { ++ if (bwh_old < 0) { ++ printk(KERN_ERR "unionfs: rename error (bwh_old=%d)\n", ++ bwh_old); ++ err = -EIO; ++ goto out; ++ } ++ err = create_whiteout(old_dentry, bwh_old); ++ if (err) { ++ /* can't fix anything now, so we exit with -EIO */ ++ printk(KERN_ERR "unionfs: can't create a whiteout for " ++ "%s in rename!\n", old_dentry->d_name.name); ++ err = -EIO; ++ } ++ } ++ ++out: ++ return err; ++ ++revert: ++ /* Do revert here. */ ++ local_err = unionfs_refresh_lower_dentry(new_dentry, old_bstart); ++ if (local_err) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the new refresh failed\n"); ++ eio = -EIO; ++ } ++ ++ local_err = unionfs_refresh_lower_dentry(old_dentry, old_bstart); ++ if (local_err) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the old refresh failed\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ if (!unionfs_lower_dentry_idx(new_dentry, bindex) || ++ !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the object disappeared from under us!\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ if (unionfs_lower_dentry_idx(old_dentry, bindex) && ++ unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the object was created underneath us!\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ local_err = __unionfs_rename(new_dir, new_dentry, ++ old_dir, old_dentry, old_bstart); ++ ++ /* If we can't fix it, then we cop-out with -EIO. */ ++ if (local_err) { ++ printk(KERN_ERR "unionfs: revert failed in rename!\n"); ++ eio = -EIO; ++ } ++ ++ local_err = unionfs_refresh_lower_dentry(new_dentry, bindex); ++ if (local_err) ++ eio = -EIO; ++ local_err = unionfs_refresh_lower_dentry(old_dentry, bindex); ++ if (local_err) ++ eio = -EIO; ++ ++revert_out: ++ if (eio) ++ err = eio; ++ return err; ++} ++ ++/* ++ * We can't copyup a directory, because it may involve huge numbers of ++ * children, etc. Doing that in the kernel would be bad, so instead we ++ * return EXDEV to the user-space utility that caused this, and let the ++ * user-space recurse and ask us to copy up each file separately. ++ */ ++static int may_rename_dir(struct dentry *dentry) ++{ ++ int err, bstart; ++ ++ err = check_empty(dentry, NULL); ++ if (err == -ENOTEMPTY) { ++ if (is_robranch(dentry)) ++ return -EXDEV; ++ } else if (err) { ++ return err; ++ } ++ ++ bstart = dbstart(dentry); ++ if (dbend(dentry) == bstart || dbopaque(dentry) == bstart) ++ return 0; ++ ++ dbstart(dentry) = bstart + 1; ++ err = check_empty(dentry, NULL); ++ dbstart(dentry) = bstart; ++ if (err == -ENOTEMPTY) ++ err = -EXDEV; ++ return err; ++} ++ ++int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry) ++{ ++ int err = 0; ++ struct dentry *wh_dentry; ++ ++ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_double_lock_dentry(old_dentry, new_dentry); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(old_dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (unlikely(!d_deleted(new_dentry) && new_dentry->d_inode && ++ !__unionfs_d_revalidate_chain(new_dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ if (!S_ISDIR(old_dentry->d_inode->i_mode)) ++ err = unionfs_partial_lookup(old_dentry); ++ else ++ err = may_rename_dir(old_dentry); ++ ++ if (err) ++ goto out; ++ ++ err = unionfs_partial_lookup(new_dentry); ++ if (err) ++ goto out; ++ ++ /* ++ * if new_dentry is already lower because of whiteout, ++ * simply override it even if the whited-out dir is not empty. ++ */ ++ wh_dentry = find_first_whiteout(new_dentry); ++ if (!IS_ERR(wh_dentry)) { ++ dput(wh_dentry); ++ } else if (new_dentry->d_inode) { ++ if (S_ISDIR(old_dentry->d_inode->i_mode) != ++ S_ISDIR(new_dentry->d_inode->i_mode)) { ++ err = S_ISDIR(old_dentry->d_inode->i_mode) ? ++ -ENOTDIR : -EISDIR; ++ goto out; ++ } ++ ++ if (S_ISDIR(new_dentry->d_inode->i_mode)) { ++ struct unionfs_dir_state *namelist = NULL; ++ /* check if this unionfs directory is empty or not */ ++ err = check_empty(new_dentry, &namelist); ++ if (err) ++ goto out; ++ ++ if (!is_robranch(new_dentry)) ++ err = delete_whiteouts(new_dentry, ++ dbstart(new_dentry), ++ namelist); ++ ++ free_rdstate(namelist); ++ ++ if (err) ++ goto out; ++ } ++ } ++ ++ err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry); ++ if (err) ++ goto out; ++ ++ /* ++ * force re-lookup since the dir on ro branch is not renamed, and ++ * lower dentries still indicate the un-renamed ones. ++ */ ++ if (S_ISDIR(old_dentry->d_inode->i_mode)) ++ atomic_dec(&UNIONFS_D(old_dentry)->generation); ++ else ++ unionfs_postcopyup_release(old_dentry); ++ if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) { ++ unionfs_postcopyup_release(new_dentry); ++ unionfs_postcopyup_setmnt(new_dentry); ++ if (!unionfs_lower_inode(new_dentry->d_inode)) { ++ /* ++ * If we get here, it means that no copyup was ++ * needed, and that a file by the old name already ++ * existing on the destination branch; that file got ++ * renamed earlier in this function, so all we need ++ * to do here is set the lower inode. ++ */ ++ struct inode *inode; ++ inode = unionfs_lower_inode(old_dentry->d_inode); ++ igrab(inode); ++ unionfs_set_lower_inode_idx(new_dentry->d_inode, ++ dbstart(new_dentry), ++ inode); ++ } ++ } ++ /* if all of this renaming succeeded, update our times */ ++ unionfs_copy_attr_times(old_dentry->d_inode); ++ unionfs_copy_attr_times(new_dentry->d_inode); ++ unionfs_check_inode(old_dir); ++ unionfs_check_inode(new_dir); ++ unionfs_check_dentry(old_dentry); ++ unionfs_check_dentry(new_dentry); ++ ++out: ++ if (err) /* clear the new_dentry stuff created */ ++ d_drop(new_dentry); ++ unionfs_unlock_dentry(new_dentry); ++ unionfs_unlock_dentry(old_dentry); ++ unionfs_read_unlock(old_dentry->d_sb); ++ return err; ++} +diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c +new file mode 100644 +index 0000000..dd45e39 +--- /dev/null ++++ b/fs/unionfs/sioq.c +@@ -0,0 +1,101 @@ ++/* ++ * Copyright (c) 2006-2008 Erez Zadok ++ * Copyright (c) 2006 Charles P. Wright ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006 Junjiro Okajima ++ * Copyright (c) 2006 David P. Quigley ++ * Copyright (c) 2006-2008 Stony Brook University ++ * Copyright (c) 2006-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Super-user IO work Queue - sometimes we need to perform actions which ++ * would fail due to the unix permissions on the parent directory (e.g., ++ * rmdir a directory which appears empty, but in reality contains ++ * whiteouts). ++ */ ++ ++static struct workqueue_struct *superio_workqueue; ++ ++int __init init_sioq(void) ++{ ++ int err; ++ ++ superio_workqueue = create_workqueue("unionfs_siod"); ++ if (!IS_ERR(superio_workqueue)) ++ return 0; ++ ++ err = PTR_ERR(superio_workqueue); ++ printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err); ++ superio_workqueue = NULL; ++ return err; ++} ++ ++void stop_sioq(void) ++{ ++ if (superio_workqueue) ++ destroy_workqueue(superio_workqueue); ++} ++ ++void run_sioq(work_func_t func, struct sioq_args *args) ++{ ++ INIT_WORK(&args->work, func); ++ ++ init_completion(&args->comp); ++ while (!queue_work(superio_workqueue, &args->work)) { ++ /* TODO: do accounting if needed */ ++ schedule(); ++ } ++ wait_for_completion(&args->comp); ++} ++ ++void __unionfs_create(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct create_args *c = &args->create; ++ ++ args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd); ++ complete(&args->comp); ++} ++ ++void __unionfs_mkdir(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct mkdir_args *m = &args->mkdir; ++ ++ args->err = vfs_mkdir(m->parent, m->dentry, m->mode); ++ complete(&args->comp); ++} ++ ++void __unionfs_mknod(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct mknod_args *m = &args->mknod; ++ ++ args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev); ++ complete(&args->comp); ++} ++ ++void __unionfs_symlink(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct symlink_args *s = &args->symlink; ++ ++ args->err = vfs_symlink(s->parent, s->dentry, s->symbuf); ++ complete(&args->comp); ++} ++ ++void __unionfs_unlink(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct unlink_args *u = &args->unlink; ++ ++ args->err = vfs_unlink(u->parent, u->dentry); ++ complete(&args->comp); ++} +diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h +new file mode 100644 +index 0000000..679a0df +--- /dev/null ++++ b/fs/unionfs/sioq.h +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (c) 2006-2008 Erez Zadok ++ * Copyright (c) 2006 Charles P. Wright ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006 Junjiro Okajima ++ * Copyright (c) 2006 David P. Quigley ++ * Copyright (c) 2006-2008 Stony Brook University ++ * Copyright (c) 2006-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _SIOQ_H ++#define _SIOQ_H ++ ++struct deletewh_args { ++ struct unionfs_dir_state *namelist; ++ struct dentry *dentry; ++ int bindex; ++}; ++ ++struct is_opaque_args { ++ struct dentry *dentry; ++}; ++ ++struct create_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++ struct nameidata *nd; ++}; ++ ++struct mkdir_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++}; ++ ++struct mknod_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++ dev_t dev; ++}; ++ ++struct symlink_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ char *symbuf; ++}; ++ ++struct unlink_args { ++ struct inode *parent; ++ struct dentry *dentry; ++}; ++ ++ ++struct sioq_args { ++ struct completion comp; ++ struct work_struct work; ++ int err; ++ void *ret; ++ ++ union { ++ struct deletewh_args deletewh; ++ struct is_opaque_args is_opaque; ++ struct create_args create; ++ struct mkdir_args mkdir; ++ struct mknod_args mknod; ++ struct symlink_args symlink; ++ struct unlink_args unlink; ++ }; ++}; ++ ++/* Extern definitions for SIOQ functions */ ++extern int __init init_sioq(void); ++extern void stop_sioq(void); ++extern void run_sioq(work_func_t func, struct sioq_args *args); ++ ++/* Extern definitions for our privilege escalation helpers */ ++extern void __unionfs_create(struct work_struct *work); ++extern void __unionfs_mkdir(struct work_struct *work); ++extern void __unionfs_mknod(struct work_struct *work); ++extern void __unionfs_symlink(struct work_struct *work); ++extern void __unionfs_unlink(struct work_struct *work); ++extern void __delete_whiteouts(struct work_struct *work); ++extern void __is_opaque_dir(struct work_struct *work); ++ ++#endif /* not _SIOQ_H */ +diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c +new file mode 100644 +index 0000000..8747d20 +--- /dev/null ++++ b/fs/unionfs/subr.c +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * returns the right n_link value based on the inode type ++ */ ++int unionfs_get_nlinks(const struct inode *inode) ++{ ++ /* don't bother to do all the work since we're unlinked */ ++ if (inode->i_nlink == 0) ++ return 0; ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return unionfs_lower_inode(inode)->i_nlink; ++ ++ /* ++ * For directories, we return 1. The only place that could cares ++ * about links is readdir, and there's d_type there so even that ++ * doesn't matter. ++ */ ++ return 1; ++} ++ ++/* copy a/m/ctime from the lower branch with the newest times */ ++void unionfs_copy_attr_times(struct inode *upper) ++{ ++ int bindex; ++ struct inode *lower; ++ ++ if (!upper) ++ return; ++ if (ibstart(upper) < 0) { ++#ifdef CONFIG_UNION_FS_DEBUG ++ WARN_ON(ibstart(upper) < 0); ++#endif /* CONFIG_UNION_FS_DEBUG */ ++ return; ++ } ++ for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) { ++ lower = unionfs_lower_inode_idx(upper, bindex); ++ if (!lower) ++ continue; /* not all lower dir objects may exist */ ++ if (unlikely(timespec_compare(&upper->i_mtime, ++ &lower->i_mtime) < 0)) ++ upper->i_mtime = lower->i_mtime; ++ if (unlikely(timespec_compare(&upper->i_ctime, ++ &lower->i_ctime) < 0)) ++ upper->i_ctime = lower->i_ctime; ++ if (unlikely(timespec_compare(&upper->i_atime, ++ &lower->i_atime) < 0)) ++ upper->i_atime = lower->i_atime; ++ } ++} ++ ++/* ++ * A unionfs/fanout version of fsstack_copy_attr_all. Uses a ++ * unionfs_get_nlinks to properly calcluate the number of links to a file. ++ * Also, copies the max() of all a/m/ctimes for all lower inodes (which is ++ * important if the lower inode is a directory type) ++ */ ++void unionfs_copy_attr_all(struct inode *dest, ++ const struct inode *src) ++{ ++ dest->i_mode = src->i_mode; ++ dest->i_uid = src->i_uid; ++ dest->i_gid = src->i_gid; ++ dest->i_rdev = src->i_rdev; ++ ++ unionfs_copy_attr_times(dest); ++ ++ dest->i_blkbits = src->i_blkbits; ++ dest->i_flags = src->i_flags; ++ ++ /* ++ * Update the nlinks AFTER updating the above fields, because the ++ * get_links callback may depend on them. ++ */ ++ dest->i_nlink = unionfs_get_nlinks(dest); ++} +diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c +new file mode 100644 +index 0000000..e774ef3 +--- /dev/null ++++ b/fs/unionfs/super.c +@@ -0,0 +1,1042 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * The inode cache is used with alloc_inode for both our inode info and the ++ * vfs inode. ++ */ ++static struct kmem_cache *unionfs_inode_cachep; ++ ++struct inode *unionfs_iget(struct super_block *sb, unsigned long ino) ++{ ++ int size; ++ struct unionfs_inode_info *info; ++ struct inode *inode; ++ ++ inode = iget_locked(sb, ino); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ if (!(inode->i_state & I_NEW)) ++ return inode; ++ ++ info = UNIONFS_I(inode); ++ memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode)); ++ info->bstart = -1; ++ info->bend = -1; ++ atomic_set(&info->generation, ++ atomic_read(&UNIONFS_SB(inode->i_sb)->generation)); ++ spin_lock_init(&info->rdlock); ++ info->rdcount = 1; ++ info->hashsize = -1; ++ INIT_LIST_HEAD(&info->readdircache); ++ ++ size = sbmax(inode->i_sb) * sizeof(struct inode *); ++ info->lower_inodes = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!info->lower_inodes)) { ++ printk(KERN_CRIT "unionfs: no kernel memory when allocating " ++ "lower-pointer array!\n"); ++ iget_failed(inode); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ inode->i_version++; ++ inode->i_op = &unionfs_main_iops; ++ inode->i_fop = &unionfs_main_fops; ++ ++ inode->i_mapping->a_ops = &unionfs_aops; ++ ++ /* ++ * reset times so unionfs_copy_attr_all can keep out time invariants ++ * right (upper inode time being the max of all lower ones). ++ */ ++ inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0; ++ inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0; ++ inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0; ++ unlock_new_inode(inode); ++ return inode; ++} ++ ++/* ++ * we now define delete_inode, because there are two VFS paths that may ++ * destroy an inode: one of them calls clear inode before doing everything ++ * else that's needed, and the other is fine. This way we truncate the inode ++ * size (and its pages) and then clear our own inode, which will do an iput ++ * on our and the lower inode. ++ * ++ * No need to lock sb info's rwsem. ++ */ ++static void unionfs_delete_inode(struct inode *inode) ++{ ++#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) ++ spin_lock(&inode->i_lock); ++#endif ++ i_size_write(inode, 0); /* every f/s seems to do that */ ++#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) ++ spin_unlock(&inode->i_lock); ++#endif ++ ++ if (inode->i_data.nrpages) ++ truncate_inode_pages(&inode->i_data, 0); ++ ++ clear_inode(inode); ++} ++ ++/* ++ * final actions when unmounting a file system ++ * ++ * No need to lock rwsem. ++ */ ++static void unionfs_put_super(struct super_block *sb) ++{ ++ int bindex, bstart, bend; ++ struct unionfs_sb_info *spd; ++ int leaks = 0; ++ ++ spd = UNIONFS_SB(sb); ++ if (!spd) ++ return; ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ ++ /* Make sure we have no leaks of branchget/branchput. */ ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (unlikely(branch_count(sb, bindex) != 0)) { ++ printk(KERN_CRIT ++ "unionfs: branch %d has %d references left!\n", ++ bindex, branch_count(sb, bindex)); ++ leaks = 1; ++ } ++ BUG_ON(leaks != 0); ++ ++ /* decrement lower super references */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct super_block *s; ++ s = unionfs_lower_super_idx(sb, bindex); ++ unionfs_set_lower_super_idx(sb, bindex, NULL); ++ atomic_dec(&s->s_active); ++ } ++ ++ kfree(spd->dev_name); ++ kfree(spd->data); ++ kfree(spd); ++ sb->s_fs_info = NULL; ++} ++ ++/* ++ * Since people use this to answer the "How big of a file can I write?" ++ * question, we report the size of the highest priority branch as the size of ++ * the union. ++ */ ++static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ int err = 0; ++ struct super_block *sb; ++ struct dentry *lower_dentry; ++ ++ sb = dentry->d_sb; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ unionfs_check_dentry(dentry); ++ ++ lower_dentry = unionfs_lower_dentry(sb->s_root); ++ err = vfs_statfs(lower_dentry, buf); ++ ++ /* set return buf to our f/s to avoid confusing user-level utils */ ++ buf->f_type = UNIONFS_SUPER_MAGIC; ++ /* ++ * Our maximum file name can is shorter by a few bytes because every ++ * file name could potentially be whited-out. ++ * ++ * XXX: this restriction goes away with ODF. ++ */ ++ unionfs_set_max_namelen(&buf->f_namelen); ++ ++ /* ++ * reset two fields to avoid confusing user-land. ++ * XXX: is this still necessary? ++ */ ++ memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t)); ++ memset(&buf->f_spare, 0, sizeof(buf->f_spare)); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(sb); ++ return err; ++} ++ ++/* handle mode changing during remount */ ++static noinline_for_stack int do_remount_mode_option( ++ char *optarg, ++ int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths) ++{ ++ int err = -EINVAL; ++ int perms, idx; ++ char *modename = strchr(optarg, '='); ++ struct nameidata nd; ++ ++ /* by now, optarg contains the branch name */ ++ if (!*optarg) { ++ printk(KERN_ERR ++ "unionfs: no branch specified for mode change\n"); ++ goto out; ++ } ++ if (!modename) { ++ printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n", ++ optarg); ++ goto out; ++ } ++ *modename++ = '\0'; ++ err = parse_branch_mode(modename, &perms); ++ if (err) { ++ printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n", ++ modename, optarg); ++ goto out; ++ } ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx = 0; idx < cur_branches; idx++) ++ if (nd.path.mnt == new_lower_paths[idx].mnt && ++ nd.path.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_put(&nd.path); /* no longer needed */ ++ if (idx == cur_branches) { ++ err = -ENOENT; /* err may have been reset above */ ++ printk(KERN_ERR "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ goto out; ++ } ++ /* check/change mode for existing branch */ ++ /* we don't warn if perms==branchperms */ ++ new_data[idx].branchperms = perms; ++ err = 0; ++out: ++ return err; ++} ++ ++/* handle branch deletion during remount */ ++static noinline_for_stack int do_remount_del_option( ++ char *optarg, int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths) ++{ ++ int err = -EINVAL; ++ int idx; ++ struct nameidata nd; ++ ++ /* optarg contains the branch name to delete */ ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx = 0; idx < cur_branches; idx++) ++ if (nd.path.mnt == new_lower_paths[idx].mnt && ++ nd.path.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_put(&nd.path); /* no longer needed */ ++ if (idx == cur_branches) { ++ printk(KERN_ERR "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ err = -ENOENT; ++ goto out; ++ } ++ /* check if there are any open files on the branch to be deleted */ ++ if (atomic_read(&new_data[idx].open_files) > 0) { ++ err = -EBUSY; ++ goto out; ++ } ++ ++ /* ++ * Now we have to delete the branch. First, release any handles it ++ * has. Then, move the remaining array indexes past "idx" in ++ * new_data and new_lower_paths one to the left. Finally, adjust ++ * cur_branches. ++ */ ++ path_put(&new_lower_paths[idx]); ++ ++ if (idx < cur_branches - 1) { ++ /* if idx==cur_branches-1, we delete last branch: easy */ ++ memmove(&new_data[idx], &new_data[idx+1], ++ (cur_branches - 1 - idx) * ++ sizeof(struct unionfs_data)); ++ memmove(&new_lower_paths[idx], &new_lower_paths[idx+1], ++ (cur_branches - 1 - idx) * sizeof(struct path)); ++ } ++ ++ err = 0; ++out: ++ return err; ++} ++ ++/* handle branch insertion during remount */ ++static noinline_for_stack int do_remount_add_option( ++ char *optarg, int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths, ++ int *high_branch_id) ++{ ++ int err = -EINVAL; ++ int perms; ++ int idx = 0; /* default: insert at beginning */ ++ char *new_branch , *modename = NULL; ++ struct nameidata nd; ++ ++ /* ++ * optarg can be of several forms: ++ * ++ * /bar:/foo insert /foo before /bar ++ * /bar:/foo=ro insert /foo in ro mode before /bar ++ * /foo insert /foo in the beginning (prepend) ++ * :/foo insert /foo at the end (append) ++ */ ++ if (*optarg == ':') { /* append? */ ++ new_branch = optarg + 1; /* skip ':' */ ++ idx = cur_branches; ++ goto found_insertion_point; ++ } ++ new_branch = strchr(optarg, ':'); ++ if (!new_branch) { /* prepend? */ ++ new_branch = optarg; ++ goto found_insertion_point; ++ } ++ *new_branch++ = '\0'; /* holds path+mode of new branch */ ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx = 0; idx < cur_branches; idx++) ++ if (nd.path.mnt == new_lower_paths[idx].mnt && ++ nd.path.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_put(&nd.path); /* no longer needed */ ++ if (idx == cur_branches) { ++ printk(KERN_ERR "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ err = -ENOENT; ++ goto out; ++ } ++ ++ /* ++ * At this point idx will hold the index where the new branch should ++ * be inserted before. ++ */ ++found_insertion_point: ++ /* find the mode for the new branch */ ++ if (new_branch) ++ modename = strchr(new_branch, '='); ++ if (modename) ++ *modename++ = '\0'; ++ if (!new_branch || !*new_branch) { ++ printk(KERN_ERR "unionfs: null new branch\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ err = parse_branch_mode(modename, &perms); ++ if (err) { ++ printk(KERN_ERR "unionfs: invalid mode \"%s\" for " ++ "branch \"%s\"\n", modename, new_branch); ++ goto out; ++ } ++ err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ new_branch, err); ++ goto out; ++ } ++ /* ++ * It's probably safe to check_mode the new branch to insert. Note: ++ * we don't allow inserting branches which are unionfs's by ++ * themselves (check_branch returns EINVAL in that case). This is ++ * because this code base doesn't support stacking unionfs: the ODF ++ * code base supports that correctly. ++ */ ++ err = check_branch(&nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: lower directory " ++ "\"%s\" is not a valid branch\n", optarg); ++ path_put(&nd.path); ++ goto out; ++ } ++ ++ /* ++ * Now we have to insert the new branch. But first, move the bits ++ * to make space for the new branch, if needed. Finally, adjust ++ * cur_branches. ++ * We don't release nd here; it's kept until umount/remount. ++ */ ++ if (idx < cur_branches) { ++ /* if idx==cur_branches, we append: easy */ ++ memmove(&new_data[idx+1], &new_data[idx], ++ (cur_branches - idx) * sizeof(struct unionfs_data)); ++ memmove(&new_lower_paths[idx+1], &new_lower_paths[idx], ++ (cur_branches - idx) * sizeof(struct path)); ++ } ++ new_lower_paths[idx].dentry = nd.path.dentry; ++ new_lower_paths[idx].mnt = nd.path.mnt; ++ ++ new_data[idx].sb = nd.path.dentry->d_sb; ++ atomic_set(&new_data[idx].open_files, 0); ++ new_data[idx].branchperms = perms; ++ new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */ ++ ++ err = 0; ++out: ++ return err; ++} ++ ++ ++/* ++ * Support branch management options on remount. ++ * ++ * See Documentation/filesystems/unionfs/ for details. ++ * ++ * @flags: numeric mount options ++ * @options: mount options string ++ * ++ * This function can rearrange a mounted union dynamically, adding and ++ * removing branches, including changing branch modes. Clearly this has to ++ * be done safely and atomically. Luckily, the VFS already calls this ++ * function with lock_super(sb) and lock_kernel() held, preventing ++ * concurrent mixing of new mounts, remounts, and unmounts. Moreover, ++ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb) ++ * to purge dentries/inodes from our superblock, and also called ++ * fsync_super(sb) to purge any dirty pages. So we're good. ++ * ++ * XXX: however, our remount code may also need to invalidate mapped pages ++ * so as to force them to be re-gotten from the (newly reconfigured) lower ++ * branches. This has to wait for proper mmap and cache coherency support ++ * in the VFS. ++ * ++ */ ++static int unionfs_remount_fs(struct super_block *sb, int *flags, ++ char *options) ++{ ++ int err = 0; ++ int i; ++ char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */ ++ char *optname; ++ int cur_branches = 0; /* no. of current branches */ ++ int new_branches = 0; /* no. of branches actually left in the end */ ++ int add_branches; /* est. no. of branches to add */ ++ int del_branches; /* est. no. of branches to del */ ++ int max_branches; /* max possible no. of branches */ ++ struct unionfs_data *new_data = NULL, *tmp_data = NULL; ++ struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL; ++ struct inode **new_lower_inodes = NULL; ++ int new_high_branch_id; /* new high branch ID */ ++ int size; /* memory allocation size, temp var */ ++ int old_ibstart, old_ibend; ++ ++ unionfs_write_lock(sb); ++ ++ /* ++ * The VFS will take care of "ro" and "rw" flags, and we can safely ++ * ignore MS_SILENT, but anything else left over is an error. So we ++ * need to check if any other flags may have been passed (none are ++ * allowed/supported as of now). ++ */ ++ if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) { ++ printk(KERN_ERR ++ "unionfs: remount flags 0x%x unsupported\n", *flags); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ /* ++ * If 'options' is NULL, it's probably because the user just changed ++ * the union to a "ro" or "rw" and the VFS took care of it. So ++ * nothing to do and we're done. ++ */ ++ if (!options || options[0] == '\0') ++ goto out_error; ++ ++ /* ++ * Find out how many branches we will have in the end, counting ++ * "add" and "del" commands. Copy the "options" string because ++ * strsep modifies the string and we need it later. ++ */ ++ tmp_to_free = kstrdup(options, GFP_KERNEL); ++ optionstmp = tmp_to_free; ++ if (unlikely(!optionstmp)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ cur_branches = sbmax(sb); /* current no. branches */ ++ new_branches = sbmax(sb); ++ del_branches = 0; ++ add_branches = 0; ++ new_high_branch_id = sbhbid(sb); /* save current high_branch_id */ ++ while ((optname = strsep(&optionstmp, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ ++ if (!strcmp("add", optname)) ++ add_branches++; ++ else if (!strcmp("del", optname)) ++ del_branches++; ++ } ++ kfree(tmp_to_free); ++ /* after all changes, will we have at least one branch left? */ ++ if ((new_branches + add_branches - del_branches) < 1) { ++ printk(KERN_ERR ++ "unionfs: no branches left after remount\n"); ++ err = -EINVAL; ++ goto out_free; ++ } ++ ++ /* ++ * Since we haven't actually parsed all the add/del options, nor ++ * have we checked them for errors, we don't know for sure how many ++ * branches we will have after all changes have taken place. In ++ * fact, the total number of branches left could be less than what ++ * we have now. So we need to allocate space for a temporary ++ * placeholder that is at least as large as the maximum number of ++ * branches we *could* have, which is the current number plus all ++ * the additions. Once we're done with these temp placeholders, we ++ * may have to re-allocate the final size, copy over from the temp, ++ * and then free the temps (done near the end of this function). ++ */ ++ max_branches = cur_branches + add_branches; ++ /* allocate space for new pointers to lower dentry */ ++ tmp_data = kcalloc(max_branches, ++ sizeof(struct unionfs_data), GFP_KERNEL); ++ if (unlikely(!tmp_data)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ /* allocate space for new pointers to lower paths */ ++ tmp_lower_paths = kcalloc(max_branches, ++ sizeof(struct path), GFP_KERNEL); ++ if (unlikely(!tmp_lower_paths)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ /* copy current info into new placeholders, incrementing refcnts */ ++ memcpy(tmp_data, UNIONFS_SB(sb)->data, ++ cur_branches * sizeof(struct unionfs_data)); ++ memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths, ++ cur_branches * sizeof(struct path)); ++ for (i = 0; i < cur_branches; i++) ++ path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */ ++ ++ /******************************************************************* ++ * For each branch command, do path_lookup on the requested branch, ++ * and apply the change to a temp branch list. To handle errors, we ++ * already dup'ed the old arrays (above), and increased the refcnts ++ * on various f/s objects. So now we can do all the path_lookups ++ * and branch-management commands on the new arrays. If it fail mid ++ * way, we free the tmp arrays and *put all objects. If we succeed, ++ * then we free old arrays and *put its objects, and then replace ++ * the arrays with the new tmp list (we may have to re-allocate the ++ * memory because the temp lists could have been larger than what we ++ * actually needed). ++ *******************************************************************/ ++ ++ while ((optname = strsep(&options, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ /* ++ * At this stage optname holds a comma-delimited option, but ++ * without the commas. Next, we need to break the string on ++ * the '=' symbol to separate CMD=ARG, where ARG itself can ++ * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode", ++ * KEY is "/foo", and VAL is "rw". ++ */ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ /* incgen remount option (instead of old ioctl) */ ++ if (!strcmp("incgen", optname)) { ++ err = 0; ++ goto out_no_change; ++ } ++ ++ /* ++ * All of our options take an argument now. (Insert ones ++ * that don't above this check.) So at this stage optname ++ * contains the CMD part and optarg contains the ARG part. ++ */ ++ if (!optarg || !*optarg) { ++ printk(KERN_ERR "unionfs: all remount options require " ++ "an argument (%s)\n", optname); ++ err = -EINVAL; ++ goto out_release; ++ } ++ ++ if (!strcmp("add", optname)) { ++ err = do_remount_add_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths, ++ &new_high_branch_id); ++ if (err) ++ goto out_release; ++ new_branches++; ++ if (new_branches > UNIONFS_MAX_BRANCHES) { ++ printk(KERN_ERR "unionfs: command exceeds " ++ "%d branches\n", UNIONFS_MAX_BRANCHES); ++ err = -E2BIG; ++ goto out_release; ++ } ++ continue; ++ } ++ if (!strcmp("del", optname)) { ++ err = do_remount_del_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths); ++ if (err) ++ goto out_release; ++ new_branches--; ++ continue; ++ } ++ if (!strcmp("mode", optname)) { ++ err = do_remount_mode_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths); ++ if (err) ++ goto out_release; ++ continue; ++ } ++ ++ /* ++ * When you use "mount -o remount,ro", mount(8) will ++ * reportedly pass the original dirs= string from ++ * /proc/mounts. So for now, we have to ignore dirs= and ++ * not consider it an error, unless we want to allow users ++ * to pass dirs= in remount. Note that to allow the VFS to ++ * actually process the ro/rw remount options, we have to ++ * return 0 from this function. ++ */ ++ if (!strcmp("dirs", optname)) { ++ printk(KERN_WARNING ++ "unionfs: remount ignoring option \"%s\"\n", ++ optname); ++ continue; ++ } ++ ++ err = -EINVAL; ++ printk(KERN_ERR ++ "unionfs: unrecognized option \"%s\"\n", optname); ++ goto out_release; ++ } ++ ++out_no_change: ++ ++ /****************************************************************** ++ * WE'RE ALMOST DONE: check if leftmost branch might be read-only, ++ * see if we need to allocate a small-sized new vector, copy the ++ * vectors to their correct place, release the refcnt of the older ++ * ones, and return. Also handle invalidating any pages that will ++ * have to be re-read. ++ *******************************************************************/ ++ ++ if (!(tmp_data[0].branchperms & MAY_WRITE)) { ++ printk(KERN_ERR "unionfs: leftmost branch cannot be read-only " ++ "(use \"remount,ro\" to create a read-only union)\n"); ++ err = -EINVAL; ++ goto out_release; ++ } ++ ++ /* (re)allocate space for new pointers to lower dentry */ ++ size = new_branches * sizeof(struct unionfs_data); ++ new_data = krealloc(tmp_data, size, GFP_KERNEL); ++ if (unlikely(!new_data)) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* allocate space for new pointers to lower paths */ ++ size = new_branches * sizeof(struct path); ++ new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL); ++ if (unlikely(!new_lower_paths)) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* allocate space for new pointers to lower inodes */ ++ new_lower_inodes = kcalloc(new_branches, ++ sizeof(struct inode *), GFP_KERNEL); ++ if (unlikely(!new_lower_inodes)) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* ++ * OK, just before we actually put the new set of branches in place, ++ * we need to ensure that our own f/s has no dirty objects left. ++ * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and ++ * fsync_super(sb), taking care of dentries, inodes, and dirty ++ * pages. So all that's left is for us to invalidate any leftover ++ * (non-dirty) pages to ensure that they will be re-read from the ++ * new lower branches (and to support mmap). ++ */ ++ ++ /* ++ * Once we finish the remounting successfully, our superblock ++ * generation number will have increased. This will be detected by ++ * our dentry-revalidation code upon subsequent f/s operations ++ * through unionfs. The revalidation code will rebuild the union of ++ * lower inodes for a given unionfs inode and invalidate any pages ++ * of such "stale" inodes (by calling our purge_inode_data ++ * function). This revalidation will happen lazily and ++ * incrementally, as users perform operations on cached inodes. We ++ * would like to encourage this revalidation to happen sooner if ++ * possible, so we like to try to invalidate as many other pages in ++ * our superblock as we can. We used to call drop_pagecache_sb() or ++ * a variant thereof, but either method was racy (drop_caches alone ++ * is known to be racy). So now we let the revalidation happen on a ++ * per file basis in ->d_revalidate. ++ */ ++ ++ /* grab new lower super references; release old ones */ ++ for (i = 0; i < new_branches; i++) ++ atomic_inc(&new_data[i].sb->s_active); ++ for (i = 0; i < sbmax(sb); i++) ++ atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active); ++ ++ /* copy new vectors into their correct place */ ++ tmp_data = UNIONFS_SB(sb)->data; ++ UNIONFS_SB(sb)->data = new_data; ++ new_data = NULL; /* so don't free good pointers below */ ++ tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths; ++ UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths; ++ new_lower_paths = NULL; /* so don't free good pointers below */ ++ ++ /* update our unionfs_sb_info and root dentry index of last branch */ ++ i = sbmax(sb); /* save no. of branches to release at end */ ++ sbend(sb) = new_branches - 1; ++ dbend(sb->s_root) = new_branches - 1; ++ old_ibstart = ibstart(sb->s_root->d_inode); ++ old_ibend = ibend(sb->s_root->d_inode); ++ ibend(sb->s_root->d_inode) = new_branches - 1; ++ UNIONFS_D(sb->s_root)->bcount = new_branches; ++ new_branches = i; /* no. of branches to release below */ ++ ++ /* ++ * Update lower inodes: 3 steps ++ * 1. grab ref on all new lower inodes ++ */ ++ for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) { ++ struct dentry *lower_dentry = ++ unionfs_lower_dentry_idx(sb->s_root, i); ++ igrab(lower_dentry->d_inode); ++ new_lower_inodes[i] = lower_dentry->d_inode; ++ } ++ /* 2. release reference on all older lower inodes */ ++ iput_lowers(sb->s_root->d_inode, old_ibstart, old_ibend, true); ++ /* 3. update root dentry's inode to new lower_inodes array */ ++ UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes; ++ new_lower_inodes = NULL; ++ ++ /* maxbytes may have changed */ ++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; ++ /* update high branch ID */ ++ sbhbid(sb) = new_high_branch_id; ++ ++ /* update our sb->generation for revalidating objects */ ++ i = atomic_inc_return(&UNIONFS_SB(sb)->generation); ++ atomic_set(&UNIONFS_D(sb->s_root)->generation, i); ++ atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i); ++ if (!(*flags & MS_SILENT)) ++ pr_info("unionfs: %s: new generation number %d\n", ++ UNIONFS_SB(sb)->dev_name, i); ++ /* finally, update the root dentry's times */ ++ unionfs_copy_attr_times(sb->s_root->d_inode); ++ err = 0; /* reset to success */ ++ ++ /* ++ * The code above falls through to the next label, and releases the ++ * refcnts of the older ones (stored in tmp_*): if we fell through ++ * here, it means success. However, if we jump directly to this ++ * label from any error above, then an error occurred after we ++ * grabbed various refcnts, and so we have to release the ++ * temporarily constructed structures. ++ */ ++out_release: ++ /* no need to cleanup/release anything in tmp_data */ ++ if (tmp_lower_paths) ++ for (i = 0; i < new_branches; i++) ++ path_put(&tmp_lower_paths[i]); ++out_free: ++ kfree(tmp_lower_paths); ++ kfree(tmp_data); ++ kfree(new_lower_paths); ++ kfree(new_data); ++ kfree(new_lower_inodes); ++out_error: ++ unionfs_check_dentry(sb->s_root); ++ unionfs_write_unlock(sb); ++ return err; ++} ++ ++/* ++ * Called by iput() when the inode reference count reached zero ++ * and the inode is not hashed anywhere. Used to clear anything ++ * that needs to be, before the inode is completely destroyed and put ++ * on the inode free list. ++ * ++ * No need to lock sb info's rwsem. ++ */ ++static void unionfs_clear_inode(struct inode *inode) ++{ ++ int bindex, bstart, bend; ++ struct inode *lower_inode; ++ struct list_head *pos, *n; ++ struct unionfs_dir_state *rdstate; ++ ++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { ++ rdstate = list_entry(pos, struct unionfs_dir_state, cache); ++ list_del(&rdstate->cache); ++ free_rdstate(rdstate); ++ } ++ ++ /* ++ * Decrement a reference to a lower_inode, which was incremented ++ * by our read_inode when it was created initially. ++ */ ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (bstart >= 0) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ iput(lower_inode); ++ lockdep_on(); ++ } ++ } ++ ++ kfree(UNIONFS_I(inode)->lower_inodes); ++ UNIONFS_I(inode)->lower_inodes = NULL; ++} ++ ++static struct inode *unionfs_alloc_inode(struct super_block *sb) ++{ ++ struct unionfs_inode_info *i; ++ ++ i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL); ++ if (unlikely(!i)) ++ return NULL; ++ ++ /* memset everything up to the inode to 0 */ ++ memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode)); ++ ++ i->vfs_inode.i_version = 1; ++ return &i->vfs_inode; ++} ++ ++static void unionfs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode)); ++} ++ ++/* unionfs inode cache constructor */ ++static void init_once(void *obj) ++{ ++ struct unionfs_inode_info *i = obj; ++ ++ inode_init_once(&i->vfs_inode); ++} ++ ++int unionfs_init_inode_cache(void) ++{ ++ int err = 0; ++ ++ unionfs_inode_cachep = ++ kmem_cache_create("unionfs_inode_cache", ++ sizeof(struct unionfs_inode_info), 0, ++ SLAB_RECLAIM_ACCOUNT, init_once); ++ if (unlikely(!unionfs_inode_cachep)) ++ err = -ENOMEM; ++ return err; ++} ++ ++/* unionfs inode cache destructor */ ++void unionfs_destroy_inode_cache(void) ++{ ++ if (unionfs_inode_cachep) ++ kmem_cache_destroy(unionfs_inode_cachep); ++} ++ ++/* ++ * Called when we have a dirty inode, right here we only throw out ++ * parts of our readdir list that are too old. ++ * ++ * No need to grab sb info's rwsem. ++ */ ++static int unionfs_write_inode(struct inode *inode, int sync) ++{ ++ struct list_head *pos, *n; ++ struct unionfs_dir_state *rdstate; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { ++ rdstate = list_entry(pos, struct unionfs_dir_state, cache); ++ /* We keep this list in LRU order. */ ++ if ((rdstate->access + RDCACHE_JIFFIES) > jiffies) ++ break; ++ UNIONFS_I(inode)->rdcount--; ++ list_del(&rdstate->cache); ++ free_rdstate(rdstate); ++ } ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ ++ return 0; ++} ++ ++/* ++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent ++ * code can actually succeed and won't leave tasks that need handling. ++ */ ++static void unionfs_umount_begin(struct super_block *sb) ++{ ++ struct super_block *lower_sb; ++ int bindex, bstart, bend; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD); ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_sb = unionfs_lower_super_idx(sb, bindex); ++ ++ if (lower_sb && lower_sb->s_op && ++ lower_sb->s_op->umount_begin) ++ lower_sb->s_op->umount_begin(lower_sb); ++ } ++ ++ unionfs_read_unlock(sb); ++} ++ ++static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt) ++{ ++ struct super_block *sb = mnt->mnt_sb; ++ int ret = 0; ++ char *tmp_page; ++ char *path; ++ int bindex, bstart, bend; ++ int perms; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD); ++ ++ unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD); ++ ++ tmp_page = (char *) __get_free_page(GFP_KERNEL); ++ if (unlikely(!tmp_page)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ ++ seq_printf(m, ",dirs="); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct path p; ++ p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex); ++ p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex); ++ path = d_path(&p, tmp_page, PAGE_SIZE); ++ if (IS_ERR(path)) { ++ ret = PTR_ERR(path); ++ goto out; ++ } ++ ++ perms = branchperms(sb, bindex); ++ ++ seq_printf(m, "%s=%s", path, ++ perms & MAY_WRITE ? "rw" : "ro"); ++ if (bindex != bend) ++ seq_printf(m, ":"); ++ } ++ ++out: ++ free_page((unsigned long) tmp_page); ++ ++ unionfs_unlock_dentry(sb->s_root); ++ ++ unionfs_read_unlock(sb); ++ ++ return ret; ++} ++ ++struct super_operations unionfs_sops = { ++ .delete_inode = unionfs_delete_inode, ++ .put_super = unionfs_put_super, ++ .statfs = unionfs_statfs, ++ .remount_fs = unionfs_remount_fs, ++ .clear_inode = unionfs_clear_inode, ++ .umount_begin = unionfs_umount_begin, ++ .show_options = unionfs_show_options, ++ .write_inode = unionfs_write_inode, ++ .alloc_inode = unionfs_alloc_inode, ++ .destroy_inode = unionfs_destroy_inode, ++}; +diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h +new file mode 100644 +index 0000000..1b9c3f7 +--- /dev/null ++++ b/fs/unionfs/union.h +@@ -0,0 +1,607 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _UNION_H_ ++#define _UNION_H_ ++ ++#include <linux/dcache.h> ++#include <linux/file.h> ++#include <linux/list.h> ++#include <linux/fs.h> ++#include <linux/mm.h> ++#include <linux/module.h> ++#include <linux/mount.h> ++#include <linux/namei.h> ++#include <linux/page-flags.h> ++#include <linux/pagemap.h> ++#include <linux/poll.h> ++#include <linux/security.h> ++#include <linux/seq_file.h> ++#include <linux/slab.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/statfs.h> ++#include <linux/string.h> ++#include <linux/vmalloc.h> ++#include <linux/writeback.h> ++#include <linux/buffer_head.h> ++#include <linux/xattr.h> ++#include <linux/fs_stack.h> ++#include <linux/magic.h> ++#include <linux/log2.h> ++#include <linux/poison.h> ++#include <linux/mman.h> ++#include <linux/backing-dev.h> ++#include <linux/splice.h> ++ ++#include <asm/system.h> ++ ++#include <linux/union_fs.h> ++ ++/* the file system name */ ++#define UNIONFS_NAME "unionfs" ++ ++/* unionfs root inode number */ ++#define UNIONFS_ROOT_INO 1 ++ ++/* number of times we try to get a unique temporary file name */ ++#define GET_TMPNAM_MAX_RETRY 5 ++ ++/* maximum number of branches we support, to avoid memory blowup */ ++#define UNIONFS_MAX_BRANCHES 128 ++ ++/* minimum time (seconds) required for time-based cache-coherency */ ++#define UNIONFS_MIN_CC_TIME 3 ++ ++/* Operations vectors defined in specific files. */ ++extern struct file_operations unionfs_main_fops; ++extern struct file_operations unionfs_dir_fops; ++extern struct inode_operations unionfs_main_iops; ++extern struct inode_operations unionfs_dir_iops; ++extern struct inode_operations unionfs_symlink_iops; ++extern struct super_operations unionfs_sops; ++extern struct dentry_operations unionfs_dops; ++extern struct address_space_operations unionfs_aops, unionfs_dummy_aops; ++extern struct vm_operations_struct unionfs_vm_ops; ++ ++/* How long should an entry be allowed to persist */ ++#define RDCACHE_JIFFIES (5*HZ) ++ ++/* compatibility with Real-Time patches */ ++#ifdef CONFIG_PREEMPT_RT ++# define unionfs_rw_semaphore compat_rw_semaphore ++#else /* not CONFIG_PREEMPT_RT */ ++# define unionfs_rw_semaphore rw_semaphore ++#endif /* not CONFIG_PREEMPT_RT */ ++ ++/* file private data. */ ++struct unionfs_file_info { ++ int bstart; ++ int bend; ++ atomic_t generation; ++ ++ struct unionfs_dir_state *rdstate; ++ struct file **lower_files; ++ int *saved_branch_ids; /* IDs of branches when file was opened */ ++ struct vm_operations_struct *lower_vm_ops; ++ bool wrote_to_file; /* for delayed copyup */ ++}; ++ ++/* unionfs inode data in memory */ ++struct unionfs_inode_info { ++ int bstart; ++ int bend; ++ atomic_t generation; ++ /* Stuff for readdir over NFS. */ ++ spinlock_t rdlock; ++ struct list_head readdircache; ++ int rdcount; ++ int hashsize; ++ int cookie; ++ ++ /* The lower inodes */ ++ struct inode **lower_inodes; ++ ++ struct inode vfs_inode; ++}; ++ ++/* unionfs dentry data in memory */ ++struct unionfs_dentry_info { ++ /* ++ * The semaphore is used to lock the dentry as soon as we get into a ++ * unionfs function from the VFS. Our lock ordering is that children ++ * go before their parents. ++ */ ++ struct mutex lock; ++ int bstart; ++ int bend; ++ int bopaque; ++ int bcount; ++ atomic_t generation; ++ struct path *lower_paths; ++}; ++ ++/* These are the pointers to our various objects. */ ++struct unionfs_data { ++ struct super_block *sb; /* lower super_block */ ++ atomic_t open_files; /* number of open files on branch */ ++ int branchperms; ++ int branch_id; /* unique branch ID at re/mount time */ ++}; ++ ++/* unionfs super-block data in memory */ ++struct unionfs_sb_info { ++ int bend; ++ ++ atomic_t generation; ++ ++ /* ++ * This rwsem is used to make sure that a branch management ++ * operation... ++ * 1) will not begin before all currently in-flight operations ++ * complete. ++ * 2) any new operations do not execute until the currently ++ * running branch management operation completes. ++ * ++ * The write_lock_owner records the PID of the task which grabbed ++ * the rw_sem for writing. If the same task also tries to grab the ++ * read lock, we allow it. This prevents a self-deadlock when ++ * branch-management is used on a pivot_root'ed union, because we ++ * have to ->lookup paths which belong to the same union. ++ */ ++ struct unionfs_rw_semaphore rwsem; ++ pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */ ++ int high_branch_id; /* last unique branch ID given */ ++ char *dev_name; /* to identify different unions in pr_debug */ ++ struct unionfs_data *data; ++}; ++ ++/* ++ * structure for making the linked list of entries by readdir on left branch ++ * to compare with entries on right branch ++ */ ++struct filldir_node { ++ struct list_head file_list; /* list for directory entries */ ++ char *name; /* name entry */ ++ int hash; /* name hash */ ++ int namelen; /* name len since name is not 0 terminated */ ++ ++ /* ++ * we can check for duplicate whiteouts and files in the same branch ++ * in order to return -EIO. ++ */ ++ int bindex; ++ ++ /* is this a whiteout entry? */ ++ int whiteout; ++ ++ /* Inline name, so we don't need to separately kmalloc small ones */ ++ char iname[DNAME_INLINE_LEN_MIN]; ++}; ++ ++/* Directory hash table. */ ++struct unionfs_dir_state { ++ unsigned int cookie; /* the cookie, based off of rdversion */ ++ unsigned int offset; /* The entry we have returned. */ ++ int bindex; ++ loff_t dirpos; /* offset within the lower level directory */ ++ int size; /* How big is the hash table? */ ++ int hashentries; /* How many entries have been inserted? */ ++ unsigned long access; ++ ++ /* This cache list is used when the inode keeps us around. */ ++ struct list_head cache; ++ struct list_head list[0]; ++}; ++ ++/* externs needed for fanout.h or sioq.h */ ++extern int unionfs_get_nlinks(const struct inode *inode); ++extern void unionfs_copy_attr_times(struct inode *upper); ++extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src); ++ ++/* include miscellaneous macros */ ++#include "fanout.h" ++#include "sioq.h" ++ ++/* externs for cache creation/deletion routines */ ++extern void unionfs_destroy_filldir_cache(void); ++extern int unionfs_init_filldir_cache(void); ++extern int unionfs_init_inode_cache(void); ++extern void unionfs_destroy_inode_cache(void); ++extern int unionfs_init_dentry_cache(void); ++extern void unionfs_destroy_dentry_cache(void); ++ ++/* Initialize and free readdir-specific state. */ ++extern int init_rdstate(struct file *file); ++extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, ++ int bindex); ++extern struct unionfs_dir_state *find_rdstate(struct inode *inode, ++ loff_t fpos); ++extern void free_rdstate(struct unionfs_dir_state *state); ++extern int add_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen, int bindex, ++ int whiteout); ++extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen, ++ int is_whiteout); ++ ++extern struct dentry **alloc_new_dentries(int objs); ++extern struct unionfs_data *alloc_new_data(int objs); ++ ++/* We can only use 32-bits of offset for rdstate --- blech! */ ++#define DIREOF (0xfffff) ++#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */ ++#define MAXRDCOOKIE (0xfff) ++/* Turn an rdstate into an offset. */ ++static inline off_t rdstate2offset(struct unionfs_dir_state *buf) ++{ ++ off_t tmp; ++ ++ tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS) ++ | (buf->offset & DIREOF); ++ return tmp; ++} ++ ++/* Macros for locking a super_block. */ ++enum unionfs_super_lock_class { ++ UNIONFS_SMUTEX_NORMAL, ++ UNIONFS_SMUTEX_PARENT, /* when locking on behalf of file */ ++ UNIONFS_SMUTEX_CHILD, /* when locking on behalf of dentry */ ++}; ++static inline void unionfs_read_lock(struct super_block *sb, int subclass) ++{ ++ if (UNIONFS_SB(sb)->write_lock_owner && ++ UNIONFS_SB(sb)->write_lock_owner == current->pid) ++ return; ++ down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass); ++} ++static inline void unionfs_read_unlock(struct super_block *sb) ++{ ++ if (UNIONFS_SB(sb)->write_lock_owner && ++ UNIONFS_SB(sb)->write_lock_owner == current->pid) ++ return; ++ up_read(&UNIONFS_SB(sb)->rwsem); ++} ++static inline void unionfs_write_lock(struct super_block *sb) ++{ ++ down_write(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->write_lock_owner = current->pid; ++} ++static inline void unionfs_write_unlock(struct super_block *sb) ++{ ++ up_write(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->write_lock_owner = 0; ++} ++ ++static inline void unionfs_double_lock_dentry(struct dentry *d1, ++ struct dentry *d2) ++{ ++ BUG_ON(d1 == d2); ++ if (d1 < d2) { ++ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT); ++ } else { ++ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT); ++ } ++} ++ ++extern int new_dentry_private_data(struct dentry *dentry, int subclass); ++extern int realloc_dentry_private_data(struct dentry *dentry); ++extern void free_dentry_private_data(struct dentry *dentry); ++extern void update_bstart(struct dentry *dentry); ++extern int init_lower_nd(struct nameidata *nd, unsigned int flags); ++extern void release_lower_nd(struct nameidata *nd, int err); ++ ++/* ++ * EXTERNALS: ++ */ ++ ++/* replicates the directory structure up to given dentry in given branch */ ++extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry, ++ const char *name, int bindex); ++ ++/* partial lookup */ ++extern int unionfs_partial_lookup(struct dentry *dentry); ++extern struct dentry *unionfs_lookup_full(struct dentry *dentry, ++ struct nameidata *nd_unused, ++ int lookupmode); ++ ++/* copies a file from dbstart to newbindex branch */ ++extern int copyup_file(struct inode *dir, struct file *file, int bstart, ++ int newbindex, loff_t size); ++extern int copyup_named_file(struct inode *dir, struct file *file, ++ char *name, int bstart, int new_bindex, ++ loff_t len); ++/* copies a dentry from dbstart to newbindex branch */ ++extern int copyup_dentry(struct inode *dir, struct dentry *dentry, ++ int bstart, int new_bindex, const char *name, ++ int namelen, struct file **copyup_file, loff_t len); ++/* helper functions for post-copyup actions */ ++extern void unionfs_postcopyup_setmnt(struct dentry *dentry); ++extern void unionfs_postcopyup_release(struct dentry *dentry); ++ ++/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */ ++extern int check_empty(struct dentry *dentry, ++ struct unionfs_dir_state **namelist); ++/* whiteout and opaque directory helpers */ ++extern char *alloc_whname(const char *name, int len); ++extern bool is_whiteout_name(char **namep, int *namelenp); ++extern bool is_validname(const char *name); ++extern struct dentry *lookup_whiteout(const char *name, ++ struct dentry *lower_parent); ++extern struct dentry *find_first_whiteout(struct dentry *dentry); ++extern int unlink_whiteout(struct dentry *wh_dentry); ++extern int check_unlink_whiteout(struct dentry *dentry, ++ struct dentry *lower_dentry, int bindex); ++extern int create_whiteout(struct dentry *dentry, int start); ++extern int delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist); ++extern int is_opaque_dir(struct dentry *dentry, int bindex); ++extern int make_dir_opaque(struct dentry *dir, int bindex); ++extern void unionfs_set_max_namelen(long *namelen); ++ ++extern void unionfs_reinterpose(struct dentry *this_dentry); ++extern struct super_block *unionfs_duplicate_super(struct super_block *sb); ++ ++/* Locking functions. */ ++extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl); ++extern int unionfs_getlk(struct file *file, struct file_lock *fl); ++ ++/* Common file operations. */ ++extern int unionfs_file_revalidate(struct file *file, bool willwrite); ++extern int unionfs_file_revalidate_locked(struct file *file, bool willwrite); ++extern int unionfs_open(struct inode *inode, struct file *file); ++extern int unionfs_file_release(struct inode *inode, struct file *file); ++extern int unionfs_flush(struct file *file, fl_owner_t id); ++extern long unionfs_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg); ++extern int unionfs_fsync(struct file *file, struct dentry *dentry, ++ int datasync); ++extern int unionfs_fasync(int fd, struct file *file, int flag); ++ ++/* Inode operations */ ++extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino); ++extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry); ++extern int unionfs_unlink(struct inode *dir, struct dentry *dentry); ++extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry); ++ ++extern bool __unionfs_d_revalidate_one_locked(struct dentry *dentry, ++ struct nameidata *nd, ++ bool willwrite); ++extern bool __unionfs_d_revalidate_chain(struct dentry *dentry, ++ struct nameidata *nd, bool willwrite); ++extern bool is_negative_lower(const struct dentry *dentry); ++extern bool is_newer_lower(const struct dentry *dentry); ++extern void purge_sb_data(struct super_block *sb); ++ ++/* The values for unionfs_interpose's flag. */ ++#define INTERPOSE_DEFAULT 0 ++#define INTERPOSE_LOOKUP 1 ++#define INTERPOSE_REVAL 2 ++#define INTERPOSE_REVAL_NEG 3 ++#define INTERPOSE_PARTIAL 4 ++ ++extern struct dentry *unionfs_interpose(struct dentry *this_dentry, ++ struct super_block *sb, int flag); ++ ++#ifdef CONFIG_UNION_FS_XATTR ++/* Extended attribute functions. */ ++extern void *unionfs_xattr_alloc(size_t size, size_t limit); ++static inline void unionfs_xattr_kfree(const void *p) ++{ ++ kfree(p); ++} ++extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, ++ void *value, size_t size); ++extern int unionfs_removexattr(struct dentry *dentry, const char *name); ++extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list, ++ size_t size); ++extern int unionfs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags); ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++/* The root directory is unhashed, but isn't deleted. */ ++static inline int d_deleted(struct dentry *d) ++{ ++ return d_unhashed(d) && (d != d->d_sb->s_root); ++} ++ ++/* unionfs_permission, check if we should bypass error to facilitate copyup */ ++#define IS_COPYUP_ERR(err) ((err) == -EROFS) ++ ++/* unionfs_open, check if we need to copyup the file */ ++#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND) ++#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS) ++ ++static inline int branchperms(const struct super_block *sb, int index) ++{ ++ BUG_ON(index < 0); ++ return UNIONFS_SB(sb)->data[index].branchperms; ++} ++ ++static inline int set_branchperms(struct super_block *sb, int index, int perms) ++{ ++ BUG_ON(index < 0); ++ UNIONFS_SB(sb)->data[index].branchperms = perms; ++ return perms; ++} ++ ++/* Is this file on a read-only branch? */ ++static inline int is_robranch_super(const struct super_block *sb, int index) ++{ ++ int ret; ++ ++ ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0; ++ return ret; ++} ++ ++/* Is this file on a read-only branch? */ ++static inline int is_robranch_idx(const struct dentry *dentry, int index) ++{ ++ struct super_block *lower_sb; ++ ++ BUG_ON(index < 0); ++ ++ if (!(branchperms(dentry->d_sb, index) & MAY_WRITE)) ++ return -EROFS; ++ ++ lower_sb = unionfs_lower_super_idx(dentry->d_sb, index); ++ BUG_ON(lower_sb == NULL); ++ /* ++ * test sb flags directly, not IS_RDONLY(lower_inode) because the ++ * lower_dentry could be a negative. ++ */ ++ if (lower_sb->s_flags & MS_RDONLY) ++ return -EROFS; ++ ++ return 0; ++} ++ ++static inline int is_robranch(const struct dentry *dentry) ++{ ++ int index; ++ ++ index = UNIONFS_D(dentry)->bstart; ++ BUG_ON(index < 0); ++ ++ return is_robranch_idx(dentry, index); ++} ++ ++/* ++ * EXTERNALS: ++ */ ++extern int check_branch(struct nameidata *nd); ++extern int parse_branch_mode(const char *name, int *perms); ++ ++/* locking helpers */ ++static inline struct dentry *lock_parent(struct dentry *dentry) ++{ ++ struct dentry *dir = dget_parent(dentry); ++ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); ++ return dir; ++} ++static inline struct dentry *lock_parent_wh(struct dentry *dentry) ++{ ++ struct dentry *dir = dget_parent(dentry); ++ ++ mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT); ++ return dir; ++} ++ ++static inline void unlock_dir(struct dentry *dir) ++{ ++ mutex_unlock(&dir->d_inode->i_mutex); ++ dput(dir); ++} ++ ++static inline struct vfsmount *unionfs_mntget(struct dentry *dentry, ++ int bindex) ++{ ++ struct vfsmount *mnt; ++ ++ BUG_ON(!dentry || bindex < 0); ++ ++ mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex)); ++#ifdef CONFIG_UNION_FS_DEBUG ++ if (!mnt) ++ pr_debug("unionfs: mntget: mnt=%p bindex=%d\n", ++ mnt, bindex); ++#endif /* CONFIG_UNION_FS_DEBUG */ ++ ++ return mnt; ++} ++ ++static inline void unionfs_mntput(struct dentry *dentry, int bindex) ++{ ++ struct vfsmount *mnt; ++ ++ if (!dentry && bindex < 0) ++ return; ++ BUG_ON(!dentry || bindex < 0); ++ ++ mnt = unionfs_lower_mnt_idx(dentry, bindex); ++#ifdef CONFIG_UNION_FS_DEBUG ++ /* ++ * Directories can have NULL lower objects in between start/end, but ++ * NOT if at the start/end range. We cannot verify that this dentry ++ * is a type=DIR, because it may already be a negative dentry. But ++ * if dbstart is greater than dbend, we know that this couldn't have ++ * been a regular file: it had to have been a directory. ++ */ ++ if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry))) ++ pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex); ++#endif /* CONFIG_UNION_FS_DEBUG */ ++ mntput(mnt); ++} ++ ++#ifdef CONFIG_UNION_FS_DEBUG ++ ++/* useful for tracking code reachability */ ++#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) ++ ++#define unionfs_check_inode(i) __unionfs_check_inode((i), \ ++ __FILE__, __func__, __LINE__) ++#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \ ++ __FILE__, __func__, __LINE__) ++#define unionfs_check_file(f) __unionfs_check_file((f), \ ++ __FILE__, __func__, __LINE__) ++#define unionfs_check_nd(n) __unionfs_check_nd((n), \ ++ __FILE__, __func__, __LINE__) ++#define show_branch_counts(sb) __show_branch_counts((sb), \ ++ __FILE__, __func__, __LINE__) ++#define show_inode_times(i) __show_inode_times((i), \ ++ __FILE__, __func__, __LINE__) ++#define show_dinode_times(d) __show_dinode_times((d), \ ++ __FILE__, __func__, __LINE__) ++#define show_inode_counts(i) __show_inode_counts((i), \ ++ __FILE__, __func__, __LINE__) ++ ++extern void __unionfs_check_inode(const struct inode *inode, const char *fname, ++ const char *fxn, int line); ++extern void __unionfs_check_dentry(const struct dentry *dentry, ++ const char *fname, const char *fxn, ++ int line); ++extern void __unionfs_check_file(const struct file *file, ++ const char *fname, const char *fxn, int line); ++extern void __unionfs_check_nd(const struct nameidata *nd, ++ const char *fname, const char *fxn, int line); ++extern void __show_branch_counts(const struct super_block *sb, ++ const char *file, const char *fxn, int line); ++extern void __show_inode_times(const struct inode *inode, ++ const char *file, const char *fxn, int line); ++extern void __show_dinode_times(const struct dentry *dentry, ++ const char *file, const char *fxn, int line); ++extern void __show_inode_counts(const struct inode *inode, ++ const char *file, const char *fxn, int line); ++ ++#else /* not CONFIG_UNION_FS_DEBUG */ ++ ++/* we leave useful hooks for these check functions throughout the code */ ++#define unionfs_check_inode(i) do { } while (0) ++#define unionfs_check_dentry(d) do { } while (0) ++#define unionfs_check_file(f) do { } while (0) ++#define unionfs_check_nd(n) do { } while (0) ++#define show_branch_counts(sb) do { } while (0) ++#define show_inode_times(i) do { } while (0) ++#define show_dinode_times(d) do { } while (0) ++#define show_inode_counts(i) do { } while (0) ++ ++#endif /* not CONFIG_UNION_FS_DEBUG */ ++ ++#endif /* not _UNION_H_ */ +diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c +new file mode 100644 +index 0000000..623f68d +--- /dev/null ++++ b/fs/unionfs/unlink.c +@@ -0,0 +1,277 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Helper function for Unionfs's unlink operation. ++ * ++ * The main goal of this function is to optimize the unlinking of non-dir ++ * objects in unionfs by deleting all possible lower inode objects from the ++ * underlying branches having same dentry name as the non-dir dentry on ++ * which this unlink operation is called. This way we delete as many lower ++ * inodes as possible, and save space. Whiteouts need to be created in ++ * branch0 only if unlinking fails on any of the lower branch other than ++ * branch0, or if a lower branch is marked read-only. ++ * ++ * Also, while unlinking a file, if we encounter any dir type entry in any ++ * intermediate branch, then we remove the directory by calling vfs_rmdir. ++ * The following special cases are also handled: ++ ++ * (1) If an error occurs in branch0 during vfs_unlink, then we return ++ * appropriate error. ++ * ++ * (2) If we get an error during unlink in any of other lower branch other ++ * than branch0, then we create a whiteout in branch0. ++ * ++ * (3) If a whiteout already exists in any intermediate branch, we delete ++ * all possible inodes only up to that branch (this is an "opaqueness" ++ * as as per Documentation/filesystems/unionfs/concepts.txt). ++ * ++ */ ++static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry) ++{ ++ struct dentry *lower_dentry; ++ struct dentry *lower_dir_dentry; ++ int bindex; ++ int err = 0; ++ ++ err = unionfs_partial_lookup(dentry); ++ if (err) ++ goto out; ++ ++ /* trying to unlink all possible valid instances */ ++ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry || !lower_dentry->d_inode) ++ continue; ++ ++ lower_dir_dentry = lock_parent(lower_dentry); ++ ++ /* avoid destroying the lower inode if the object is in use */ ++ dget(lower_dentry); ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (!err) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ if (!S_ISDIR(lower_dentry->d_inode->i_mode)) ++ err = vfs_unlink(lower_dir_dentry->d_inode, ++ lower_dentry); ++ else ++ err = vfs_rmdir(lower_dir_dentry->d_inode, ++ lower_dentry); ++ lockdep_on(); ++ } ++ ++ /* if lower object deletion succeeds, update inode's times */ ++ if (!err) ++ unionfs_copy_attr_times(dentry->d_inode); ++ dput(lower_dentry); ++ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); ++ unlock_dir(lower_dir_dentry); ++ ++ if (err) ++ break; ++ } ++ ++ /* ++ * Create the whiteout in branch 0 (highest priority) only if (a) ++ * there was an error in any intermediate branch other than branch 0 ++ * due to failure of vfs_unlink/vfs_rmdir or (b) a branch marked or ++ * mounted read-only. ++ */ ++ if (err) { ++ if ((bindex == 0) || ++ ((bindex == dbstart(dentry)) && ++ (!IS_COPYUP_ERR(err)))) ++ goto out; ++ else { ++ if (!IS_COPYUP_ERR(err)) ++ pr_debug("unionfs: lower object deletion " ++ "failed in branch:%d\n", bindex); ++ err = create_whiteout(dentry, sbstart(dentry->d_sb)); ++ } ++ } ++ ++out: ++ if (!err) ++ inode_dec_link_count(dentry->d_inode); ++ ++ /* We don't want to leave negative leftover dentries for revalidate. */ ++ if (!err && (dbopaque(dentry) != -1)) ++ update_bstart(dentry); ++ ++ return err; ++} ++ ++int unionfs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ struct inode *inode = dentry->d_inode; ++ int valid; ++ ++ BUG_ON(S_ISDIR(inode->i_mode)); ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT); ++ ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ valid = __unionfs_d_revalidate_one_locked(dentry, NULL, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ unionfs_check_dentry(dentry); ++ ++ err = unionfs_unlink_whiteout(dir, dentry); ++ /* call d_drop so the system "forgets" about us */ ++ if (!err) { ++ unionfs_postcopyup_release(dentry); ++ if (inode->i_nlink == 0) /* drop lower inodes */ ++ iput_lowers_all(inode, false); ++ d_drop(dentry); ++ /* ++ * if unlink/whiteout succeeded, parent dir mtime has ++ * changed ++ */ ++ unionfs_copy_attr_times(dir); ++ } ++ ++out: ++ if (!err) { ++ unionfs_check_dentry(dentry); ++ unionfs_check_inode(dir); ++ } ++ unionfs_unlock_dentry(dentry->d_parent); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry, ++ struct unionfs_dir_state *namelist) ++{ ++ int err; ++ struct dentry *lower_dentry; ++ struct dentry *lower_dir_dentry = NULL; ++ ++ /* Here we need to remove whiteout entries. */ ++ err = delete_whiteouts(dentry, dbstart(dentry), namelist); ++ if (err) ++ goto out; ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ lower_dir_dentry = lock_parent(lower_dentry); ++ ++ /* avoid destroying the lower inode if the file is in use */ ++ dget(lower_dentry); ++ err = is_robranch(dentry); ++ if (!err) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); ++ lockdep_on(); ++ } ++ dput(lower_dentry); ++ ++ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode); ++ ++out: ++ if (lower_dir_dentry) ++ unlock_dir(lower_dir_dentry); ++ return err; ++} ++ ++int unionfs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ struct unionfs_dir_state *namelist = NULL; ++ int dstart, dend; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ unionfs_check_dentry(dentry); ++ ++ /* check if this unionfs directory is empty or not */ ++ err = check_empty(dentry, &namelist); ++ if (err) ++ goto out; ++ ++ err = unionfs_rmdir_first(dir, dentry, namelist); ++ dstart = dbstart(dentry); ++ dend = dbend(dentry); ++ /* ++ * We create a whiteout for the directory if there was an error to ++ * rmdir the first directory entry in the union. Otherwise, we ++ * create a whiteout only if there is no chance that a lower ++ * priority branch might also have the same named directory. IOW, ++ * if there is not another same-named directory at a lower priority ++ * branch, then we don't need to create a whiteout for it. ++ */ ++ if (!err) { ++ if (dstart < dend) ++ err = create_whiteout(dentry, dstart); ++ } else { ++ int new_err; ++ ++ if (dstart == 0) ++ goto out; ++ ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ ++ new_err = create_whiteout(dentry, dstart - 1); ++ if (new_err != -EEXIST) ++ err = new_err; ++ } ++ ++out: ++ /* ++ * Drop references to lower dentry/inode so storage space for them ++ * can be reclaimed. Then, call d_drop so the system "forgets" ++ * about us. ++ */ ++ if (!err) { ++ iput_lowers_all(dentry->d_inode, false); ++ dput(unionfs_lower_dentry_idx(dentry, dstart)); ++ unionfs_set_lower_dentry_idx(dentry, dstart, NULL); ++ d_drop(dentry); ++ /* update our lower vfsmnts, in case a copyup took place */ ++ unionfs_postcopyup_setmnt(dentry); ++ } ++ ++ if (namelist) ++ free_rdstate(namelist); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff --git a/fs/unionfs/whiteout.c b/fs/unionfs/whiteout.c +new file mode 100644 +index 0000000..db7a21e +--- /dev/null ++++ b/fs/unionfs/whiteout.c +@@ -0,0 +1,577 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * whiteout and opaque directory helpers ++ */ ++ ++/* What do we use for whiteouts. */ ++#define UNIONFS_WHPFX ".wh." ++#define UNIONFS_WHLEN 4 ++/* ++ * If a directory contains this file, then it is opaque. We start with the ++ * .wh. flag so that it is blocked by lookup. ++ */ ++#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque" ++#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME ++ ++/* construct whiteout filename */ ++char *alloc_whname(const char *name, int len) ++{ ++ char *buf; ++ ++ buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL); ++ if (unlikely(!buf)) ++ return ERR_PTR(-ENOMEM); ++ ++ strcpy(buf, UNIONFS_WHPFX); ++ strlcat(buf, name, len + UNIONFS_WHLEN + 1); ++ ++ return buf; ++} ++ ++/* ++ * XXX: this can be inline or CPP macro, but is here to keep all whiteout ++ * code in one place. ++ */ ++void unionfs_set_max_namelen(long *namelen) ++{ ++ *namelen -= UNIONFS_WHLEN; ++} ++ ++/* check if @namep is a whiteout, update @namep and @namelenp accordingly */ ++bool is_whiteout_name(char **namep, int *namelenp) ++{ ++ if (*namelenp > UNIONFS_WHLEN && ++ !strncmp(*namep, UNIONFS_WHPFX, UNIONFS_WHLEN)) { ++ *namep += UNIONFS_WHLEN; ++ *namelenp -= UNIONFS_WHLEN; ++ return true; ++ } ++ return false; ++} ++ ++/* is the filename valid == !(whiteout for a file or opaque dir marker) */ ++bool is_validname(const char *name) ++{ ++ if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) ++ return false; ++ if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME, ++ sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1)) ++ return false; ++ return true; ++} ++ ++/* ++ * Look for a whiteout @name in @lower_parent directory. If error, return ++ * ERR_PTR. Caller must dput() the returned dentry if not an error. ++ * ++ * XXX: some callers can reuse the whname allocated buffer to avoid repeated ++ * free then re-malloc calls. Need to provide a different API for those ++ * callers. ++ */ ++struct dentry *lookup_whiteout(const char *name, struct dentry *lower_parent) ++{ ++ char *whname = NULL; ++ int err = 0, namelen; ++ struct dentry *wh_dentry = NULL; ++ ++ namelen = strlen(name); ++ whname = alloc_whname(name, namelen); ++ if (unlikely(IS_ERR(whname))) { ++ err = PTR_ERR(whname); ++ goto out; ++ } ++ ++ /* check if whiteout exists in this branch: lookup .wh.foo */ ++ wh_dentry = lookup_one_len(whname, lower_parent, strlen(whname)); ++ if (IS_ERR(wh_dentry)) { ++ err = PTR_ERR(wh_dentry); ++ goto out; ++ } ++ ++ /* check if negative dentry (ENOENT) */ ++ if (!wh_dentry->d_inode) ++ goto out; ++ ++ /* whiteout found: check if valid type */ ++ if (!S_ISREG(wh_dentry->d_inode->i_mode)) { ++ printk(KERN_ERR "unionfs: invalid whiteout %s entry type %d\n", ++ whname, wh_dentry->d_inode->i_mode); ++ dput(wh_dentry); ++ err = -EIO; ++ goto out; ++ } ++ ++out: ++ kfree(whname); ++ if (err) ++ wh_dentry = ERR_PTR(err); ++ return wh_dentry; ++} ++ ++/* find and return first whiteout in parent directory, else ENOENT */ ++struct dentry *find_first_whiteout(struct dentry *dentry) ++{ ++ int bindex, bstart, bend; ++ struct dentry *parent, *lower_parent, *wh_dentry; ++ ++ parent = dget_parent(dentry); ++ unionfs_lock_dentry(parent, UNIONFS_DMUTEX_WHITEOUT); ++ bstart = dbstart(parent); ++ bend = dbend(parent); ++ wh_dentry = ERR_PTR(-ENOENT); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_parent = unionfs_lower_dentry_idx(parent, bindex); ++ if (!lower_parent) ++ continue; ++ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_parent); ++ if (IS_ERR(wh_dentry)) ++ continue; ++ if (wh_dentry->d_inode) ++ break; ++ dput(wh_dentry); ++ wh_dentry = ERR_PTR(-ENOENT); ++ } ++ unionfs_unlock_dentry(parent); ++ dput(parent); ++ ++ return wh_dentry; ++} ++ ++/* ++ * Unlink a whiteout dentry. Returns 0 or -errno. Caller must hold and ++ * release dentry reference. ++ */ ++int unlink_whiteout(struct dentry *wh_dentry) ++{ ++ int err; ++ struct dentry *lower_dir_dentry; ++ ++ /* dget and lock parent dentry */ ++ lower_dir_dentry = lock_parent_wh(wh_dentry); ++ ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry); ++ lockdep_on(); ++ unlock_dir(lower_dir_dentry); ++ ++ /* ++ * Whiteouts are special files and should be deleted no matter what ++ * (as if they never existed), in order to allow this create ++ * operation to succeed. This is especially important in sticky ++ * directories: a whiteout may have been created by one user, but ++ * the newly created file may be created by another user. ++ * Therefore, in order to maintain Unix semantics, if the vfs_unlink ++ * above failed, then we have to try to directly unlink the ++ * whiteout. Note: in the ODF version of unionfs, whiteout are ++ * handled much more cleanly. ++ */ ++ if (err == -EPERM) { ++ struct inode *inode = lower_dir_dentry->d_inode; ++ err = inode->i_op->unlink(inode, wh_dentry); ++ } ++ if (err) ++ printk(KERN_ERR "unionfs: could not unlink whiteout %s, " ++ "err = %d\n", wh_dentry->d_name.name, err); ++ ++ return err; ++ ++} ++ ++/* ++ * Helper function when creating new objects (create, symlink, mknod, etc.). ++ * Checks to see if there's a whiteout in @lower_dentry's parent directory, ++ * whose name is taken from @dentry. Then tries to remove that whiteout, if ++ * found. If <dentry,bindex> is a branch marked readonly, return -EROFS. ++ * If it finds both a regular file and a whiteout, return -EIO (this should ++ * never happen). ++ * ++ * Return 0 if no whiteout was found. Return 1 if one was found and ++ * successfully removed. Therefore a value >= 0 tells the caller that ++ * @lower_dentry belongs to a good branch to create the new object in). ++ * Return -ERRNO if an error occurred during whiteout lookup or in trying to ++ * unlink the whiteout. ++ */ ++int check_unlink_whiteout(struct dentry *dentry, struct dentry *lower_dentry, ++ int bindex) ++{ ++ int err; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *lower_dir_dentry = NULL; ++ ++ /* look for whiteout dentry first */ ++ lower_dir_dentry = dget_parent(lower_dentry); ++ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_dir_dentry); ++ dput(lower_dir_dentry); ++ if (IS_ERR(wh_dentry)) { ++ err = PTR_ERR(wh_dentry); ++ goto out; ++ } ++ ++ if (!wh_dentry->d_inode) { /* no whiteout exists*/ ++ err = 0; ++ goto out_dput; ++ } ++ ++ /* check if regular file and whiteout were both found */ ++ if (unlikely(lower_dentry->d_inode)) { ++ err = -EIO; ++ printk(KERN_ERR "unionfs: found both whiteout and regular " ++ "file in directory %s (branch %d)\n", ++ lower_dentry->d_parent->d_name.name, bindex); ++ goto out_dput; ++ } ++ ++ /* check if branch is writeable */ ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (err) ++ goto out_dput; ++ ++ /* .wh.foo has been found, so let's unlink it */ ++ err = unlink_whiteout(wh_dentry); ++ if (!err) ++ err = 1; /* a whiteout was found and successfully removed */ ++out_dput: ++ dput(wh_dentry); ++out: ++ return err; ++} ++ ++/* ++ * Pass an unionfs dentry and an index. It will try to create a whiteout ++ * for the filename in dentry, and will try in branch 'index'. On error, ++ * it will proceed to a branch to the left. ++ */ ++int create_whiteout(struct dentry *dentry, int start) ++{ ++ int bstart, bend, bindex; ++ struct dentry *lower_dir_dentry; ++ struct dentry *lower_dentry; ++ struct dentry *lower_wh_dentry; ++ struct nameidata nd; ++ char *name = NULL; ++ int err = -EINVAL; ++ ++ verify_locked(dentry); ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ /* create dentry's whiteout equivalent */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (unlikely(IS_ERR(name))) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ for (bindex = start; bindex >= 0; bindex--) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ ++ if (!lower_dentry) { ++ /* ++ * if lower dentry is not present, create the ++ * entire lower dentry directory structure and go ++ * ahead. Since we want to just create whiteout, we ++ * only want the parent dentry, and hence get rid of ++ * this dentry. ++ */ ++ lower_dentry = create_parents(dentry->d_inode, ++ dentry, ++ dentry->d_name.name, ++ bindex); ++ if (!lower_dentry || IS_ERR(lower_dentry)) { ++ int ret = PTR_ERR(lower_dentry); ++ if (!IS_COPYUP_ERR(ret)) ++ printk(KERN_ERR ++ "unionfs: create_parents for " ++ "whiteout failed: bindex=%d " ++ "err=%d\n", bindex, ret); ++ continue; ++ } ++ } ++ ++ lower_wh_dentry = ++ lookup_one_len(name, lower_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(lower_wh_dentry)) ++ continue; ++ ++ /* ++ * The whiteout already exists. This used to be impossible, ++ * but now is possible because of opaqueness. ++ */ ++ if (lower_wh_dentry->d_inode) { ++ dput(lower_wh_dentry); ++ err = 0; ++ goto out; ++ } ++ ++ err = init_lower_nd(&nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ lower_dir_dentry = lock_parent_wh(lower_wh_dentry); ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (!err) ++ err = vfs_create(lower_dir_dentry->d_inode, ++ lower_wh_dentry, ++ ~current->fs->umask & S_IRUGO, ++ &nd); ++ unlock_dir(lower_dir_dentry); ++ dput(lower_wh_dentry); ++ release_lower_nd(&nd, err); ++ ++ if (!err || !IS_COPYUP_ERR(err)) ++ break; ++ } ++ ++ /* set dbopaque so that lookup will not proceed after this branch */ ++ if (!err) ++ dbopaque(dentry) = bindex; ++ ++out: ++ kfree(name); ++ return err; ++} ++ ++/* ++ * Delete all of the whiteouts in a given directory for rmdir. ++ * ++ * lower directory inode should be locked ++ */ ++static int do_delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist) ++{ ++ int err = 0; ++ struct dentry *lower_dir_dentry = NULL; ++ struct dentry *lower_dentry; ++ char *name = NULL, *p; ++ struct inode *lower_dir; ++ int i; ++ struct list_head *pos; ++ struct filldir_node *cursor; ++ ++ /* Find out lower parent dentry */ ++ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode)); ++ lower_dir = lower_dir_dentry->d_inode; ++ BUG_ON(!S_ISDIR(lower_dir->i_mode)); ++ ++ err = -ENOMEM; ++ name = __getname(); ++ if (unlikely(!name)) ++ goto out; ++ strcpy(name, UNIONFS_WHPFX); ++ p = name + UNIONFS_WHLEN; ++ ++ err = 0; ++ for (i = 0; !err && i < namelist->size; i++) { ++ list_for_each(pos, &namelist->list[i]) { ++ cursor = ++ list_entry(pos, struct filldir_node, ++ file_list); ++ /* Only operate on whiteouts in this branch. */ ++ if (cursor->bindex != bindex) ++ continue; ++ if (!cursor->whiteout) ++ continue; ++ ++ strlcpy(p, cursor->name, PATH_MAX - UNIONFS_WHLEN); ++ lower_dentry = ++ lookup_one_len(name, lower_dir_dentry, ++ cursor->namelen + ++ UNIONFS_WHLEN); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ break; ++ } ++ if (lower_dentry->d_inode) ++ err = vfs_unlink(lower_dir, lower_dentry); ++ dput(lower_dentry); ++ if (err) ++ break; ++ } ++ } ++ ++ __putname(name); ++ ++ /* After all of the removals, we should copy the attributes once. */ ++ fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode); ++ ++out: ++ return err; ++} ++ ++ ++void __delete_whiteouts(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct deletewh_args *d = &args->deletewh; ++ ++ args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist); ++ complete(&args->comp); ++} ++ ++/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */ ++int delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist) ++{ ++ int err; ++ struct super_block *sb; ++ struct dentry *lower_dir_dentry; ++ struct inode *lower_dir; ++ struct sioq_args args; ++ ++ sb = dentry->d_sb; ++ ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ BUG_ON(bindex < dbstart(dentry)); ++ BUG_ON(bindex > dbend(dentry)); ++ err = is_robranch_super(sb, bindex); ++ if (err) ++ goto out; ++ ++ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode)); ++ lower_dir = lower_dir_dentry->d_inode; ++ BUG_ON(!S_ISDIR(lower_dir->i_mode)); ++ ++ if (!inode_permission(lower_dir, MAY_WRITE | MAY_EXEC)) { ++ err = do_delete_whiteouts(dentry, bindex, namelist); ++ } else { ++ args.deletewh.namelist = namelist; ++ args.deletewh.dentry = dentry; ++ args.deletewh.bindex = bindex; ++ run_sioq(__delete_whiteouts, &args); ++ err = args.err; ++ } ++ ++out: ++ return err; ++} ++ ++/**************************************************************************** ++ * Opaque directory helpers * ++ ****************************************************************************/ ++ ++/* ++ * is_opaque_dir: returns 0 if it is NOT an opaque dir, 1 if it is, and ++ * -errno if an error occurred trying to figure this out. ++ */ ++int is_opaque_dir(struct dentry *dentry, int bindex) ++{ ++ int err = 0; ++ struct dentry *lower_dentry; ++ struct dentry *wh_lower_dentry; ++ struct inode *lower_inode; ++ struct sioq_args args; ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ lower_inode = lower_dentry->d_inode; ++ ++ BUG_ON(!S_ISDIR(lower_inode->i_mode)); ++ ++ mutex_lock(&lower_inode->i_mutex); ++ ++ if (!inode_permission(lower_inode, MAY_EXEC)) { ++ wh_lower_dentry = ++ lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ } else { ++ args.is_opaque.dentry = lower_dentry; ++ run_sioq(__is_opaque_dir, &args); ++ wh_lower_dentry = args.ret; ++ } ++ ++ mutex_unlock(&lower_inode->i_mutex); ++ ++ if (IS_ERR(wh_lower_dentry)) { ++ err = PTR_ERR(wh_lower_dentry); ++ goto out; ++ } ++ ++ /* This is an opaque dir iff wh_lower_dentry is positive */ ++ err = !!wh_lower_dentry->d_inode; ++ ++ dput(wh_lower_dentry); ++out: ++ return err; ++} ++ ++void __is_opaque_dir(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ ++ args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ complete(&args->comp); ++} ++ ++int make_dir_opaque(struct dentry *dentry, int bindex) ++{ ++ int err = 0; ++ struct dentry *lower_dentry, *diropq; ++ struct inode *lower_dir; ++ struct nameidata nd; ++ kernel_cap_t orig_cap; ++ ++ /* ++ * Opaque directory whiteout markers are special files (like regular ++ * whiteouts), and should appear to the users as if they don't ++ * exist. They should be created/deleted regardless of directory ++ * search/create permissions, but only for the duration of this ++ * creation of the .wh.__dir_opaque: file. Note, this does not ++ * circumvent normal ->permission). ++ */ ++ orig_cap = current->cap_effective; ++ cap_raise(current->cap_effective, CAP_DAC_READ_SEARCH); ++ cap_raise(current->cap_effective, CAP_DAC_OVERRIDE); ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ lower_dir = lower_dentry->d_inode; ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) || ++ !S_ISDIR(lower_dir->i_mode)); ++ ++ mutex_lock(&lower_dir->i_mutex); ++ diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ if (IS_ERR(diropq)) { ++ err = PTR_ERR(diropq); ++ goto out; ++ } ++ ++ err = init_lower_nd(&nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ if (!diropq->d_inode) ++ err = vfs_create(lower_dir, diropq, S_IRUGO, &nd); ++ if (!err) ++ dbopaque(dentry) = bindex; ++ release_lower_nd(&nd, err); ++ ++ dput(diropq); ++ ++out: ++ mutex_unlock(&lower_dir->i_mutex); ++ current->cap_effective = orig_cap; ++ return err; ++} +diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c +new file mode 100644 +index 0000000..93a8fce +--- /dev/null ++++ b/fs/unionfs/xattr.c +@@ -0,0 +1,153 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* This is lifted from fs/xattr.c */ ++void *unionfs_xattr_alloc(size_t size, size_t limit) ++{ ++ void *ptr; ++ ++ if (size > limit) ++ return ERR_PTR(-E2BIG); ++ ++ if (!size) /* size request, no buffer is needed */ ++ return NULL; ++ ++ ptr = kmalloc(size, GFP_KERNEL); ++ if (unlikely(!ptr)) ++ return ERR_PTR(-ENOMEM); ++ return ptr; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value, ++ size_t size) ++{ ++ struct dentry *lower_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_getxattr(lower_dentry, (char *) name, value, size); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++int unionfs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ struct dentry *lower_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_setxattr(lower_dentry, (char *) name, (void *) value, ++ size, flags); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++int unionfs_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct dentry *lower_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_removexattr(lower_dentry, (char *) name); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size) ++{ ++ struct dentry *lower_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ char *encoded_list = NULL; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ encoded_list = list; ++ err = vfs_listxattr(lower_dentry, encoded_list, size); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h +index bb516ce..6615a52 100644 +--- a/include/linux/fs_stack.h ++++ b/include/linux/fs_stack.h +@@ -1,17 +1,27 @@ ++/* ++ * Copyright (c) 2006-2007 Erez Zadok ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006-2007 Stony Brook University ++ * Copyright (c) 2006-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ + #ifndef _LINUX_FS_STACK_H + #define _LINUX_FS_STACK_H + +-/* This file defines generic functions used primarily by stackable ++/* ++ * This file defines generic functions used primarily by stackable + * filesystems; none of these functions require i_mutex to be held. + */ + + #include <linux/fs.h> + + /* externs for fs/stack.c */ +-extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, +- int (*get_nlinks)(struct inode *)); +- +-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src); ++extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src); ++extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src); + + /* inlines */ + static inline void fsstack_copy_attr_atime(struct inode *dest, +diff --git a/include/linux/magic.h b/include/linux/magic.h +index 1fa0c2c..67043ed 100644 +--- a/include/linux/magic.h ++++ b/include/linux/magic.h +@@ -35,6 +35,8 @@ + #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" + #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + ++#define UNIONFS_SUPER_MAGIC 0xf15f083d ++ + #define SMB_SUPER_MAGIC 0x517B + #define USBDEVICE_SUPER_MAGIC 0x9fa2 + #define CGROUP_SUPER_MAGIC 0x27e0eb +diff --git a/include/linux/splice.h b/include/linux/splice.h +index 528dcb9..4b5727c 100644 +--- a/include/linux/splice.h ++++ b/include/linux/splice.h +@@ -70,5 +70,10 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *, + struct splice_pipe_desc *); + extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, + splice_direct_actor *); ++extern long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out, ++ loff_t *ppos, size_t len, unsigned int flags); ++extern long vfs_splice_to(struct file *in, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags); + + #endif +diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h +new file mode 100644 +index 0000000..bc15a16 +--- /dev/null ++++ b/include/linux/union_fs.h +@@ -0,0 +1,22 @@ ++/* ++ * Copyright (c) 2003-2008 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2003-2008 Stony Brook University ++ * Copyright (c) 2003-2008 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _LINUX_UNION_FS_H ++#define _LINUX_UNION_FS_H ++ ++/* ++ * DEFINITIONS FOR USER AND KERNEL CODE: ++ */ ++# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int) ++# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int) ++ ++#endif /* _LINUX_UNIONFS_H */ ++ diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0011_workaround_unidef_step.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0011_workaround_unidef_step.patch new file mode 100644 index 000000000..a77ff3c62 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0011_workaround_unidef_step.patch @@ -0,0 +1,10 @@ +--- linux.trees.git/include/linux/videodev2.h.org 2008-08-15 16:31:03.000000000 -0700 ++++ linux.trees.git/include/linux/videodev2.h 2008-08-15 16:31:11.000000000 -0700 +@@ -59,7 +59,6 @@ + #include <linux/time.h> /* need struct timeval */ + #include <linux/compiler.h> /* need __user */ + #else +-#define __user + #include <sys/time.h> + #endif + #include <linux/ioctl.h> diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/0012_intelfb_945gme.patch b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0012_intelfb_945gme.patch new file mode 100644 index 000000000..15ebe5632 --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/0012_intelfb_945gme.patch @@ -0,0 +1,153 @@ +The following patch adds support for Intel's 945GME graphics chip to +the intelfb driver. I have assumed that the 945GME is identical to the +already-supported 945GM apart from its PCI IDs; this is based on a quick +look at the X driver for these chips which seems to treat them +identically. + +Signed-off-by: Phil Endecott <spam_from_intelfb@chezphil.org> + +--- + +The 945GME is used in the ASUS Eee 901, and I coded this in the hope that +I'd be able to use it to get a console at the native 1024x600 resolution +which is not known to the BIOS. I realised too late that the intelfb +driver does not support mode changing on laptops, so it won't be any +use for me. But rather than throw it away I will post it here as +essentially "untested"; maybe someone who knows more about this driver, +and with more useful hardware to test on, can pick it up. + +diff --git a/Documentation/fb/intelfb.txt b/Documentation/fb/intelfb.txt +index 27a3160..dd9e944 100644 +--- a/Documentation/fb/intelfb.txt ++++ b/Documentation/fb/intelfb.txt +@@ -14,6 +14,7 @@ graphics devices. These would include: + Intel 915GM + Intel 945G + Intel 945GM ++ Intel 945GME + Intel 965G + Intel 965GM + +diff --git a/drivers/video/intelfb/intelfb.h b/drivers/video/intelfb/intelfb.h +index 3325fbd..a50bea6 100644 +--- a/drivers/video/intelfb/intelfb.h ++++ b/drivers/video/intelfb/intelfb.h +@@ -12,9 +12,9 @@ + #endif + + /*** Version/name ***/ +-#define INTELFB_VERSION "0.9.5" ++#define INTELFB_VERSION "0.9.6" + #define INTELFB_MODULE_NAME "intelfb" +-#define SUPPORTED_CHIPSETS "830M/845G/852GM/855GM/865G/915G/915GM/945G/945GM/965G/965GM" ++#define SUPPORTED_CHIPSETS "830M/845G/852GM/855GM/865G/915G/915GM/945G/945GM/945GME/965G/965GM" + + + /*** Debug/feature defines ***/ +@@ -58,6 +58,7 @@ + #define PCI_DEVICE_ID_INTEL_915GM 0x2592 + #define PCI_DEVICE_ID_INTEL_945G 0x2772 + #define PCI_DEVICE_ID_INTEL_945GM 0x27A2 ++#define PCI_DEVICE_ID_INTEL_945GME 0x27AE + #define PCI_DEVICE_ID_INTEL_965G 0x29A2 + #define PCI_DEVICE_ID_INTEL_965GM 0x2A02 + +@@ -160,6 +161,7 @@ enum intel_chips { + INTEL_915GM, + INTEL_945G, + INTEL_945GM, ++ INTEL_945GME, + INTEL_965G, + INTEL_965GM, + }; +@@ -363,6 +365,7 @@ struct intelfb_info { + ((dinfo)->chipset == INTEL_915GM) || \ + ((dinfo)->chipset == INTEL_945G) || \ + ((dinfo)->chipset == INTEL_945GM) || \ ++ ((dinfo)->chipset == INTEL_945GME) || \ + ((dinfo)->chipset == INTEL_965G) || \ + ((dinfo)->chipset == INTEL_965GM)) + +diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c +index fcf9fad..5d896b8 100644 +--- a/drivers/video/intelfb/intelfb_i2c.c ++++ b/drivers/video/intelfb/intelfb_i2c.c +@@ -171,6 +171,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo) + /* has some LVDS + tv-out */ + case INTEL_945G: + case INTEL_945GM: ++ case INTEL_945GME: + case INTEL_965G: + case INTEL_965GM: + /* SDVO ports have a single control bus - 2 devices */ +diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c +index e44303f..a09e236 100644 +--- a/drivers/video/intelfb/intelfbdrv.c ++++ b/drivers/video/intelfb/intelfbdrv.c +@@ -2,7 +2,7 @@ + * intelfb + * + * Linux framebuffer driver for Intel(R) 830M/845G/852GM/855GM/865G/915G/915GM/ +- * 945G/945GM/965G/965GM integrated graphics chips. ++ * 945G/945GM/945GME/965G/965GM integrated graphics chips. + * + * Copyright © 2002, 2003 David Dawes <dawes@xfree86.org> + * 2004 Sylvain Meyer +@@ -102,6 +102,9 @@ + * + * 04/2008 - Version 0.9.5 + * Add support for 965G/965GM. (Maik Broemme <mbroemme@plusserver.de>) ++ * ++ * 08/2008 - Version 0.9.6 ++ * Add support for 945GME. (Phil Endecott <spam_from_intelfb@chezphil.org>) + */ + + #include <linux/module.h> +@@ -183,6 +186,7 @@ static struct pci_device_id intelfb_pci_table[] __devinitdata = { + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945GM }, ++ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945GME, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945GME }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_965G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_965G }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_965GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_965GM }, + { 0, } +@@ -555,6 +559,7 @@ static int __devinit intelfb_pci_register(struct pci_dev *pdev, + (ent->device == PCI_DEVICE_ID_INTEL_915GM) || + (ent->device == PCI_DEVICE_ID_INTEL_945G) || + (ent->device == PCI_DEVICE_ID_INTEL_945GM) || ++ (ent->device == PCI_DEVICE_ID_INTEL_945GME) || + (ent->device == PCI_DEVICE_ID_INTEL_965G) || + (ent->device == PCI_DEVICE_ID_INTEL_965GM)) { + +diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c +index 8e6d6a4..8b26b27 100644 +--- a/drivers/video/intelfb/intelfbhw.c ++++ b/drivers/video/intelfb/intelfbhw.c +@@ -143,6 +143,12 @@ int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo) + dinfo->mobile = 1; + dinfo->pll_index = PLLS_I9xx; + return 0; ++ case PCI_DEVICE_ID_INTEL_945GME: ++ dinfo->name = "Intel(R) 945GME"; ++ dinfo->chipset = INTEL_945GME; ++ dinfo->mobile = 1; ++ dinfo->pll_index = PLLS_I9xx; ++ return 0; + case PCI_DEVICE_ID_INTEL_965G: + dinfo->name = "Intel(R) 965G"; + dinfo->chipset = INTEL_965G; +@@ -186,6 +192,7 @@ int intelfbhw_get_memory(struct pci_dev *pdev, int *aperture_size, + case PCI_DEVICE_ID_INTEL_915GM: + case PCI_DEVICE_ID_INTEL_945G: + case PCI_DEVICE_ID_INTEL_945GM: ++ case PCI_DEVICE_ID_INTEL_945GME: + case PCI_DEVICE_ID_INTEL_965G: + case PCI_DEVICE_ID_INTEL_965GM: + /* 915, 945 and 965 chipsets support a 256MB aperture. + + +-- +To unsubscribe from this list: send the line "unsubscribe linux-kernel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html +Please read the FAQ at http://www.tux.org/lkml/
\ No newline at end of file diff --git a/meta/packages/linux/linux-moblin2-2.6.27-rc1/defconfig-eee901 b/meta/packages/linux/linux-moblin2-2.6.27-rc1/defconfig-eee901 new file mode 100644 index 000000000..489bfce8f --- /dev/null +++ b/meta/packages/linux/linux-moblin2-2.6.27-rc1/defconfig-eee901 @@ -0,0 +1,2322 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.27-rc1 +# Wed Sep 10 12:10:52 2008 +# +# CONFIG_64BIT is not set +CONFIG_X86_32=y +# CONFIG_X86_64 is not set +CONFIG_X86=y +CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +# CONFIG_GENERIC_TIME_VSYSCALL is not set +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_ZONE_DMA32 is not set +CONFIG_ARCH_POPULATES_NODE_MAP=y +# CONFIG_AUDIT_ARCH is not set +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_32_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="-eee901" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_CGROUPS is not set +CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y +# CONFIG_GROUP_SCHED is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_RELAY=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +CONFIG_USER_NS=y +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_PCSPKR_PLATFORM=y +# CONFIG_COMPAT_BRK is not set +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_PROFILING=y +# CONFIG_MARKERS is not set +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +CONFIG_HAVE_IOREMAP_PROT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_ARCH_TRACEHOOK is not set +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_HAVE_CLK is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +# CONFIG_LBD is not set +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_LSF is not set +CONFIG_BLK_DEV_BSG=y +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y + +# +# Processor type and features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_VSMP is not set +# CONFIG_X86_RDC321X is not set +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +# CONFIG_PARAVIRT_GUEST is not set +# CONFIG_MEMTEST is not set +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +CONFIG_M686=y +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +# CONFIG_MPSC is not set +# CONFIG_MCORE2 is not set +# CONFIG_GENERIC_CPU is not set +# CONFIG_X86_GENERIC is not set +CONFIG_X86_CPU=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_X86_XADD=y +# CONFIG_X86_PPRO_FENCE is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=6 +CONFIG_X86_DEBUGCTLMSR=y +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_DMI=y +# CONFIG_IOMMU_HELPER is not set +CONFIG_NR_CPUS=2 +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_MCE=y +# CONFIG_X86_MCE_NONFATAL is not set +# CONFIG_X86_MCE_P4THERMAL is not set +CONFIG_VM86=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_X86_REBOOTFIXUPS is not set +CONFIG_MICROCODE=y +CONFIG_MICROCODE_OLD_INTERFACE=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_HIGHMEM=y +CONFIG_NEED_NODE_MEMMAP_SIZE=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_GET_USER_PAGES_FAST=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_STATIC=y +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_RESOURCES_64BIT=y +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +# CONFIG_HIGHPTE is not set +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +# CONFIG_MTRR_SANITIZER is not set +# CONFIG_X86_PAT is not set +# CONFIG_EFI is not set +CONFIG_IRQBALANCE=y +# CONFIG_SECCOMP is not set +# CONFIG_HZ_100 is not set +# CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +# CONFIG_KEXEC_JUMP is not set +CONFIG_PHYSICAL_START=0x400000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y + +# +# Power management options +# +CONFIG_PM=y +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +# CONFIG_PM_TEST_SUSPEND is not set +CONFIG_SUSPEND_FREEZER=y +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" +CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=m +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_VIDEO=y +CONFIG_ACPI_FAN=y +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_THERMAL=y +CONFIG_ACPI_WMI=m +CONFIG_ACPI_ASUS=y +# CONFIG_ACPI_TOSHIBA is not set +# CONFIG_ACPI_CUSTOM_DSDT is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_EC=y +# CONFIG_ACPI_PCI_SLOT is not set +CONFIG_ACPI_POWER=y +CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y +CONFIG_ACPI_CONTAINER=y +CONFIG_ACPI_SBS=m +# CONFIG_APM is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_DEBUG=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_STAT_DETAILS=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K6 is not set +# CONFIG_X86_POWERNOW_K7 is not set +# CONFIG_X86_POWERNOW_K8 is not set +# CONFIG_X86_GX_SUSPMOD is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_SPEEDSTEP_ICH is not set +# CONFIG_X86_SPEEDSTEP_SMI is not set +# CONFIG_X86_P4_CLOCKMOD is not set +# CONFIG_X86_CPUFREQ_NFORCE2 is not set +# CONFIG_X86_LONGRUN is not set +# CONFIG_X86_LONGHAUL is not set +# CONFIG_X86_E_POWERSAVER is not set + +# +# shared options +# +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set +# CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y + +# +# Bus options (PCI etc.) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +# CONFIG_PCI_GOOLPC is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_DOMAINS=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCIEAER=y +CONFIG_PCIEASPM=y +# CONFIG_PCIEASPM_DEBUG is not set +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +CONFIG_PCI_LEGACY=y +# CONFIG_PCI_DEBUG is not set +CONFIG_HT_IRQ=y +CONFIG_ISA_DMA_API=y +# CONFIG_ISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set +# CONFIG_OLPC is not set +CONFIG_K8_NB=y +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats / Emulations +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +CONFIG_BINFMT_MISC=y + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_MIGRATE=y +CONFIG_XFRM_STATISTICS=y +CONFIG_XFRM_IPCOMP=m +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_TUNNEL=m +CONFIG_INET_TUNNEL=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_LRO=y +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_TUNNEL=m +CONFIG_INET6_TUNNEL=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +# CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +# CONFIG_NF_CT_PROTO_DCCP is not set +CONFIG_NF_CT_PROTO_GRE=m +CONFIG_NF_CT_PROTO_SCTP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_RATEEST=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +# CONFIG_NETFILTER_XT_MATCH_DCCP is not set +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_NF_NAT_SNMP_BASIC=m +CONFIG_NF_NAT_PROTO_GRE=m +CONFIG_NF_NAT_PROTO_UDPLITE=m +CONFIG_NF_NAT_PROTO_SCTP=m +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +CONFIG_NF_NAT_AMANDA=m +CONFIG_NF_NAT_PPTP=m +CONFIG_NF_NAT_H323=m +CONFIG_NF_NAT_SIP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_RAW=m +# CONFIG_IP_NF_SECURITY is not set +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_RAW=m +# CONFIG_IP6_NF_SECURITY is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set +CONFIG_NET_CLS_ROUTE=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +CONFIG_BT=m +CONFIG_BT_L2CAP=m +CONFIG_BT_SCO=m +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +# CONFIG_BT_BNEP_MC_FILTER is not set +# CONFIG_BT_BNEP_PROTO_FILTER is not set +# CONFIG_BT_HIDP is not set + +# +# Bluetooth device drivers +# +CONFIG_BT_HCIUSB=m +CONFIG_BT_HCIUSB_SCO=y +CONFIG_BT_HCIBTSDIO=m +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_H4=y +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_LL=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBPA10X=m +CONFIG_BT_HCIBFUSB=m +CONFIG_BT_HCIVHCI=m +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y + +# +# Wireless +# +CONFIG_CFG80211=m +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +# CONFIG_WIRELESS_EXT_SYSFS is not set +CONFIG_MAC80211=m + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_PID=y +CONFIG_MAC80211_RC_DEFAULT_PID=y +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_MESH=y +CONFIG_MAC80211_LEDS=y +CONFIG_MAC80211_DEBUGFS=y +# CONFIG_MAC80211_DEBUG_MENU is not set +CONFIG_IEEE80211=m +# CONFIG_IEEE80211_DEBUG is not set +CONFIG_IEEE80211_CRYPT_WEP=m +CONFIG_IEEE80211_CRYPT_CCMP=m +CONFIG_IEEE80211_CRYPT_TKIP=m +CONFIG_RFKILL=m +CONFIG_RFKILL_INPUT=m +CONFIG_RFKILL_LEDS=y +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +CONFIG_DEBUG_DEVRES=y +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_PNP=y +# CONFIG_PNP_DEBUG is not set + +# +# Protocols +# +CONFIG_PNPACPI=y +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=262144 +# CONFIG_BLK_DEV_XIP is not set +CONFIG_CDROM_PKTCDVD=m +CONFIG_CDROM_PKTCDVD_BUFFERS=8 +# CONFIG_CDROM_PKTCDVD_WCACHE is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_MISC_DEVICES=y +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +CONFIG_EEPROM_93CX6=m +# CONFIG_SGI_IOC4 is not set +CONFIG_TIFM_CORE=m +CONFIG_TIFM_7XX1=m +# CONFIG_ACER_WMI is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_TC1100_WMI is not set +# CONFIG_HP_WMI is not set +# CONFIG_MSI_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +# CONFIG_CHR_DEV_SG is not set +CONFIG_CHR_DEV_SCH=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_DH is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_ATA_ACPI=y +# CONFIG_SATA_PMP is not set +CONFIG_SATA_AHCI=y +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +# CONFIG_SATA_NV is not set +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +# CONFIG_SATA_SIL is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CS5536 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +CONFIG_PATA_SCH=y +# CONFIG_MD is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# + +# +# Enable only one of the two stacks, unless you know what you are doing +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +CONFIG_MACVLAN=m +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_NET_SB1000 is not set +# CONFIG_ARCNET is not set +CONFIG_PHYLIB=m + +# +# MII PHY device drivers +# +CONFIG_MARVELL_PHY=m +CONFIG_DAVICOM_PHY=m +CONFIG_QSEMI_PHY=m +CONFIG_LXT_PHY=m +CONFIG_CICADA_PHY=m +CONFIG_VITESSE_PHY=m +CONFIG_SMSC_PHY=m +CONFIG_BROADCOM_PHY=m +CONFIG_ICPLUS_PHY=m +CONFIG_REALTEK_PHY=m +CONFIG_MDIO_BITBANG=m +CONFIG_NET_ETHERNET=y +CONFIG_MII=m +CONFIG_HAPPYMEAL=m +CONFIG_SUNGEM=m +CONFIG_CASSINI=m +CONFIG_NET_VENDOR_3COM=y +# CONFIG_VORTEX is not set +# CONFIG_TYPHOON is not set +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_NET_PCI is not set +# CONFIG_B44 is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_E1000E is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set +# CONFIG_QLA3XXX is not set +CONFIG_ATL1=m +CONFIG_ATL1E=m +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +CONFIG_WLAN_PRE80211=y +# CONFIG_STRIP is not set +CONFIG_WLAN_80211=y +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_PRISM54 is not set +CONFIG_USB_ZD1201=m +CONFIG_USB_NET_RNDIS_WLAN=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m +# CONFIG_ADM8211 is not set +# CONFIG_MAC80211_HWSIM is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +CONFIG_IWLWIFI_RFKILL=y +CONFIG_IWL4965=m +# CONFIG_IWL4965_LEDS is not set +# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWL5000=y +CONFIG_IWL3945=m +CONFIG_IWL3945_RFKILL=y +# CONFIG_IWL3945_SPECTRUM_MEASUREMENT is not set +# CONFIG_IWL3945_LEDS is not set +# CONFIG_IWL3945_DEBUG is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2X00_LIB_RFKILL=y +CONFIG_RT2X00_LIB_LEDS=y +CONFIG_RT2400PCI=m +CONFIG_RT2400PCI_RFKILL=y +CONFIG_RT2400PCI_LEDS=y +CONFIG_RT2500PCI=m +CONFIG_RT2500PCI_RFKILL=y +CONFIG_RT2500PCI_LEDS=y +CONFIG_RT61PCI=m +CONFIG_RT61PCI_RFKILL=y +CONFIG_RT61PCI_LEDS=y +CONFIG_RT2500USB=m +CONFIG_RT2500USB_LEDS=y +CONFIG_RT73USB=m +CONFIG_RT73USB_LEDS=y +CONFIG_RT2X00_LIB_DEBUGFS=y +# CONFIG_RT2X00_DEBUG is not set + +# +# USB Network Adapters +# +CONFIG_USB_CATC=m +CONFIG_USB_KAWETH=m +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_USBNET=m +CONFIG_USB_NET_AX8817X=m +# CONFIG_USB_HSO is not set +CONFIG_USB_NET_CDCETHER=m +CONFIG_USB_NET_DM9601=m +CONFIG_USB_NET_GL620A=m +CONFIG_USB_NET_NET1080=m +CONFIG_USB_NET_PLUSB=m +CONFIG_USB_NET_MCS7830=m +CONFIG_USB_NET_RNDIS_HOST=m +CONFIG_USB_NET_CDC_SUBSET=m +CONFIG_USB_ALI_M5632=y +CONFIG_USB_AN2720=y +CONFIG_USB_BELKIN=y +CONFIG_USB_ARMLINUX=y +CONFIG_USB_EPSON2888=y +CONFIG_USB_KC2190=y +CONFIG_USB_NET_ZAURUS=m +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +# CONFIG_PPP_BSDCOMP is not set +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPPOL2TP=m +# CONFIG_SLIP is not set +CONFIG_SLHC=m +CONFIG_NET_FC=y +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETPOLL=y +CONFIG_NETPOLL_TRAP=y +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=m + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +CONFIG_MOUSE_SERIAL=m +# CONFIG_MOUSE_APPLETOUCH is not set +CONFIG_MOUSE_VSXXXAA=m +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +# CONFIG_INPUT_TABLET is not set +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_FUJITSU=m +CONFIG_TOUCHSCREEN_GUNZE=m +CONFIG_TOUCHSCREEN_ELO=m +CONFIG_TOUCHSCREEN_MTOUCH=m +CONFIG_TOUCHSCREEN_INEXIO=m +CONFIG_TOUCHSCREEN_MK712=m +CONFIG_TOUCHSCREEN_PENMOUNT=m +CONFIG_TOUCHSCREEN_TOUCHRIGHT=m +CONFIG_TOUCHSCREEN_TOUCHWIN=m +CONFIG_TOUCHSCREEN_UCB1400=m +# CONFIG_TOUCHSCREEN_WM97XX is not set +CONFIG_TOUCHSCREEN_USB_COMPOSITE=m +CONFIG_TOUCHSCREEN_USB_EGALAX=y +CONFIG_TOUCHSCREEN_USB_PANJIT=y +CONFIG_TOUCHSCREEN_USB_3M=y +CONFIG_TOUCHSCREEN_USB_ITM=y +CONFIG_TOUCHSCREEN_USB_ETURBO=y +CONFIG_TOUCHSCREEN_USB_GUNZE=y +CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y +CONFIG_TOUCHSCREEN_USB_IRTOUCH=y +CONFIG_TOUCHSCREEN_USB_IDEALTEK=y +CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y +CONFIG_TOUCHSCREEN_USB_GOTOP=y +CONFIG_TOUCHSCREEN_TOUCHIT213=m +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_WISTRON_BTNS is not set +CONFIG_INPUT_ATLAS_BTNS=m +CONFIG_INPUT_ATI_REMOTE=m +CONFIG_INPUT_ATI_REMOTE2=m +CONFIG_INPUT_KEYSPAN_REMOTE=m +CONFIG_INPUT_POWERMATE=m +CONFIG_INPUT_YEALINK=m +CONFIG_INPUT_UINPUT=m + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +CONFIG_SERIO_RAW=m +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_DEVKMEM is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set +CONFIG_FIX_EARLYCON_MEM=y + +# +# Non-8250 serial port support +# +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_NVRAM is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set +# CONFIG_CS5535_GPIO is not set +# CONFIG_RAW_DRIVER is not set +CONFIG_HPET=y +# CONFIG_HPET_MMAP is not set +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set +CONFIG_I2C_ALGOBIT=y + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Graphics adapter I2C/DDC channel drivers +# +# CONFIG_I2C_VOODOO3 is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_SCx200_ACB is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_AT24 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_SPI is not set +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +# CONFIG_GPIOLIB is not set +# CONFIG_W1 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set +# CONFIG_HWMON is not set +CONFIG_THERMAL=y +# CONFIG_WATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +CONFIG_VIDEO_DEV=m +CONFIG_VIDEO_V4L2_COMMON=m +# CONFIG_VIDEO_ALLOW_V4L1 is not set +CONFIG_VIDEO_V4L1_COMPAT=y +CONFIG_DVB_CORE=m +CONFIG_VIDEO_MEDIA=m + +# +# Multimedia drivers +# +# CONFIG_MEDIA_ATTACH is not set +CONFIG_MEDIA_TUNER=m +# CONFIG_MEDIA_TUNER_CUSTOMIZE is not set +CONFIG_MEDIA_TUNER_SIMPLE=m +CONFIG_MEDIA_TUNER_TDA8290=m +CONFIG_MEDIA_TUNER_TDA9887=m +CONFIG_MEDIA_TUNER_TEA5761=m +CONFIG_MEDIA_TUNER_TEA5767=m +CONFIG_MEDIA_TUNER_MT20XX=m +CONFIG_MEDIA_TUNER_XC2028=m +CONFIG_MEDIA_TUNER_XC5000=m +CONFIG_VIDEO_V4L2=m +# CONFIG_VIDEO_CAPTURE_DRIVERS is not set +# CONFIG_RADIO_ADAPTERS is not set +# CONFIG_DVB_CAPTURE_DRIVERS is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +CONFIG_AGP=y +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +CONFIG_DRM_I810=y +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +CONFIG_VGASTATE=y +CONFIG_VIDEO_OUTPUT_CONTROL=y +CONFIG_FB=y +CONFIG_FIRMWARE_EDID=y +CONFIG_FB_DDC=y +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +CONFIG_FB_VGA16=y +CONFIG_FB_UVESA=y +CONFIG_FB_VESA=y +CONFIG_FB_EFI=y +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_I810 is not set +# CONFIG_FB_LE80578 is not set +CONFIG_FB_INTEL=y +CONFIG_FB_INTEL_DEBUG=y +CONFIG_FB_INTEL_I2C=y +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_CYBLA is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set +CONFIG_BACKLIGHT_MBP_NVIDIA=y + +# +# Display device support +# +CONFIG_DISPLAY_SUPPORT=y + +# +# Display hardware drivers +# + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 +CONFIG_VIDEO_SELECT=y +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +# CONFIG_LOGO is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_RAWMIDI=m +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +# CONFIG_SND_MIXER_OSS is not set +# CONFIG_SND_PCM_OSS is not set +# CONFIG_SND_SEQUENCER_OSS is not set +CONFIG_SND_DYNAMIC_MINORS=y +# CONFIG_SND_SUPPORT_OLD_API is not set +CONFIG_SND_VERBOSE_PROCFS=y +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DEBUG=y +# CONFIG_SND_DEBUG_VERBOSE is not set +CONFIG_SND_PCM_XRUN_DEBUG=y +CONFIG_SND_VMASTER=y +CONFIG_SND_AC97_CODEC=y +CONFIG_SND_DRIVERS=y +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set +CONFIG_SND_AC97_POWER_SAVE=y +CONFIG_SND_AC97_POWER_SAVE_DEFAULT=5 +CONFIG_SND_PCI=y +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +CONFIG_SND_INTEL8X0=y +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SIS7019 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set +CONFIG_SND_USB=y +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_USB_USX2Y=m +CONFIG_SND_USB_CAIAQ=m +CONFIG_SND_USB_CAIAQ_INPUT=y +# CONFIG_SND_SOC is not set +# CONFIG_SOUND_PRIME is not set +CONFIG_AC97_BUS=y +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +CONFIG_HID_DEBUG=y +CONFIG_HIDRAW=y + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_DEVICE_CLASS is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +CONFIG_USB_SUSPEND=y +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_TT_NEWSCHED=y +CONFIG_USB_ISP116X_HCD=m +# CONFIG_USB_ISP1760_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set +# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_U132_HCD=m +CONFIG_USB_SL811_HCD=m +# CONFIG_USB_R8A66597_HCD is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +# CONFIG_USB_WDM is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_USBAT=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y +CONFIG_USB_STORAGE_ALAUDA=y +# CONFIG_USB_STORAGE_ONETOUCH is not set +CONFIG_USB_STORAGE_KARMA=y +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Imaging devices +# +CONFIG_USB_MDC800=m +CONFIG_USB_MICROTEK=m +CONFIG_USB_MON=y + +# +# USB port drivers +# +CONFIG_USB_SERIAL=m +CONFIG_USB_EZUSB=y +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_AIRCABLE=m +CONFIG_USB_SERIAL_ARK3116=m +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_CH341=m +CONFIG_USB_SERIAL_WHITEHEAT=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_CP2101=m +CONFIG_USB_SERIAL_CYPRESS_M8=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_FUNSOFT=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_GARMIN=m +CONFIG_USB_SERIAL_IPW=m +CONFIG_USB_SERIAL_IUU=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA28=y +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +CONFIG_USB_SERIAL_KEYSPAN_USA19=y +CONFIG_USB_SERIAL_KEYSPAN_USA18X=y +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_MOS7720=m +CONFIG_USB_SERIAL_MOS7840=m +# CONFIG_USB_SERIAL_MOTOROLA is not set +CONFIG_USB_SERIAL_NAVMAN=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_OTI6858=m +# CONFIG_USB_SERIAL_SPCP8X5 is not set +CONFIG_USB_SERIAL_HP4X=m +CONFIG_USB_SERIAL_SAFE=m +CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_SIERRAWIRELESS=m +CONFIG_USB_SERIAL_TI=m +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OPTION=m +CONFIG_USB_SERIAL_OMNINET=m +CONFIG_USB_SERIAL_DEBUG=m + +# +# USB Miscellaneous drivers +# +CONFIG_USB_EMI62=m +CONFIG_USB_EMI26=m +CONFIG_USB_ADUTUX=m +CONFIG_USB_AUERSWALD=m +# CONFIG_USB_RIO500 is not set +CONFIG_USB_LEGOTOWER=m +CONFIG_USB_LCD=m +CONFIG_USB_BERRY_CHARGE=m +CONFIG_USB_LED=m +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +CONFIG_USB_PHIDGET=m +CONFIG_USB_PHIDGETKIT=m +CONFIG_USB_PHIDGETMOTORCONTROL=m +CONFIG_USB_PHIDGETSERVO=m +CONFIG_USB_IDMOUSE=m +CONFIG_USB_FTDI_ELAN=m +CONFIG_USB_APPLEDISPLAY=m +CONFIG_USB_SISUSBVGA=m +CONFIG_USB_SISUSBVGA_CON=y +CONFIG_USB_LD=m +CONFIG_USB_TRANCEVIBRATOR=m +CONFIG_USB_IOWARRIOR=m +# CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_GADGET is not set +CONFIG_MMC=m +# CONFIG_MMC_DEBUG is not set +# CONFIG_MMC_UNSAFE_RESUME is not set + +# +# MMC/SD Card Drivers +# +CONFIG_MMC_BLOCK=m +CONFIG_MMC_BLOCK_BOUNCE=y +CONFIG_SDIO_UART=m +# CONFIG_MMC_TEST is not set + +# +# MMC/SD Host Controller Drivers +# +CONFIG_MMC_SDHCI=m +# CONFIG_MMC_SDHCI_PCI is not set +CONFIG_MMC_WBSD=m +CONFIG_MMC_TIFM_SD=m +CONFIG_MEMSTICK=m +CONFIG_MEMSTICK_DEBUG=y + +# +# MemoryStick drivers +# +# CONFIG_MEMSTICK_UNSAFE_RESUME is not set +CONFIG_MSPRO_BLOCK=m + +# +# MemoryStick Host Controller Drivers +# +# CONFIG_MEMSTICK_TIFM_MS is not set +# CONFIG_MEMSTICK_JMICRON_38X is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m + +# +# LED drivers +# +# CONFIG_LEDS_PCA9532 is not set +# CONFIG_LEDS_CLEVO_MAIL is not set +# CONFIG_LEDS_PCA955X is not set + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +# CONFIG_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set + +# +# SPI RTC drivers +# + +# +# Platform RTC drivers +# +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +# CONFIG_DMADEVICES is not set +# CONFIG_UIO is not set + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +CONFIG_FIRMWARE_MEMMAP=y +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +# CONFIG_DMIID is not set +# CONFIG_ISCSI_IBFT_FIND is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +CONFIG_DNOTIFY=y +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +CONFIG_FUSE_FS=m +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_UDF_NLS=y + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_CONFIGFS_FS=m + +# +# Layered filesystems +# +# CONFIG_ECRYPT_FS is not set +CONFIG_UNION_FS=y +# CONFIG_UNION_FS_XATTR is not set +# CONFIG_UNION_FS_DEBUG is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +CONFIG_SQUASHFS=y +# CONFIG_SQUASHFS_EMBEDDED is not set +CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_SCHED_DEBUG=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_LIST=y +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +CONFIG_BOOT_PRINTK_DELAY=y +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_FAULT_INJECTION is not set +CONFIG_LATENCYTOP=y +CONFIG_HAVE_FTRACE=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_TRACING=y +# CONFIG_FTRACE is not set +# CONFIG_IRQSOFF_TRACER is not set +CONFIG_SYSPROF_TRACER=y +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_X86_VERBOSE_BOOTUP=y +CONFIG_EARLY_PRINTK=y +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +CONFIG_X86_PTDUMP=y +CONFIG_DEBUG_RODATA=y +# CONFIG_DEBUG_RODATA_TEST is not set +# CONFIG_DEBUG_NX_TEST is not set +# CONFIG_4KSTACKS is not set +CONFIG_DOUBLEFAULT=y +# CONFIG_MMIOTRACE is not set +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set +# CONFIG_OPTIMIZE_INLINING is not set + +# +# Security options +# +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +# CONFIG_SECURITY_SELINUX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_GF128MUL=m +CONFIG_CRYPTO_NULL=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=m +CONFIG_CRYPTO_TEST=m + +# +# Authenticated Encryption with Associated Data +# +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_SEQIV=m + +# +# Block modes +# +CONFIG_CRYPTO_CBC=m +CONFIG_CRYPTO_CTR=m +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m + +# +# Ciphers +# +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_AES_586 is not set +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +# CONFIG_CRYPTO_SALSA20_586 is not set +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m +# CONFIG_CRYPTO_TWOFISH_586 is not set + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +# CONFIG_VIRTUALIZATION is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +CONFIG_CRC_T10DIF=y +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_AUDIT_GENERIC=y +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/meta/packages/linux/linux-moblin2.inc b/meta/packages/linux/linux-moblin2.inc new file mode 100644 index 000000000..802bde26e --- /dev/null +++ b/meta/packages/linux/linux-moblin2.inc @@ -0,0 +1,18 @@ +DESCRIPTION = "2.6 Linux Development Kernel for moblin2 platforms" +SECTION = "kernel" +LICENSE = "GPL" + +inherit kernel + +do_configure() { + + rm -f ${S}/.config || true + + cp ${WORKDIR}/defconfig-${MACHINE} ${S}/.config + + yes '' | oe_runmake oldconfig + +} + + +COMPATIBLE_MACHINE = "eee901"
\ No newline at end of file diff --git a/meta/packages/linux/linux-moblin2_2.6.27-rc1.bb b/meta/packages/linux/linux-moblin2_2.6.27-rc1.bb new file mode 100644 index 000000000..9e457607a --- /dev/null +++ b/meta/packages/linux/linux-moblin2_2.6.27-rc1.bb @@ -0,0 +1,23 @@ +require linux-moblin2.inc + +PR = "r0" + +DEFAULT_PREFERENCE_eee901 = "1" + +SRC_URI = "${KERNELORG_MIRROR}pub/linux/kernel/v2.6/linux-2.6.26.tar.bz2 \ + ${KERNELORG_MIRROR}pub/linux/kernel/v2.6/testing/patch-2.6.27-rc1.bz2;patch=1 \ + file://0001_Export_shmem_file_setup_for_DRM-GEM.patch;patch=1 \ + file://0002_i915.Use_more_consistent_names_for_regs.patch;patch=1 \ + file://0003_i915.Add_support_for_MSI_and_interrupt_mitigation.patch;patch=1 \ + file://0004_i915.Track_progress_inside_of_batchbuffers_for_determining_wedgedness.patch;patch=1 \ + file://0005_i915.remove_settable_use_mi_batchbuffer_start.patch;patch=1 \ + file://0006_i915.Ignore_X_server_provided_mmio_address.patch;patch=1 \ + file://0007_i915.Initialize_hardware_status_page_at_device_load_when_possible.patch;patch=1 \ + file://0008_drm.Add_GEM_graphics_execution_manager_to_i915_driver.patch;patch=1 \ + file://0009-squashfs3.3-2.6.27.patch;patch=1 \ + file://0010_unionfs-2.4_for_2.6.27-rc1.patch;patch=1 \ + file://0011_workaround_unidef_step.patch;patch=1 \ + file://0012_intelfb_945gme.patch;patch=1 \ + file://defconfig-eee901" + +S = "${WORKDIR}/linux-2.6.26"
\ No newline at end of file |